In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Input, callbacks, Model, layers, optimizers
import numpy as np
import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import cv2
from math import ceil
import random
import datetime

# from util_gray import DataGenerator
print(tf.__version__)
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
tf.config.experimental.set_memory_growth(physical_devices[0], True)
assert tf.config.experimental.get_memory_growth(physical_devices[0]) == True

  from ._conv import register_converters as _register_converters


2.0.1


In [2]:
num_layers = 1  # 2
training = False
dropout = 0.5
char_txt_path = './char_std.txt'
model_path = './ctnn_epoch.001-loss.0.78-val_loss.0.80.h5'


with open(char_txt_path, mode='r', encoding='UTF-8') as wf:
    ff = wf.readlines()
    num_classes = len(ff)
print(num_classes)

char_list = []
with open(char_txt_path, 'r', encoding='UTF-8') as f:
    ff = f.readlines()
    for i, char in enumerate(ff):
        char = char.strip()
        char_list.append(char)
    
char2id = {j:i for i, j in enumerate(char_list)}
id2char = {i:j for i, j in enumerate(char_list)}

6796


In [3]:
def get_angles(pos, i, d_model):
    angle_rates = 1 / np.power(10000,
                               (2 * (i // 2)) / np.float32(d_model))
    return pos * angle_rates

def get_position_embedding(sentence_length, d_model):
    angle_rads = get_angles(np.arange(sentence_length)[:, np.newaxis],
                            np.arange(d_model)[np.newaxis, :],
                            d_model)
    # position_embedding.shape: [sentence_length, d_model]
    position_embedding = np.zeros_like(angle_rads)
    # sines.shape: [sentence_length, d_model / 2]
    # cosines.shape: [sentence_length, d_model / 2]
    sines = np.sin(angle_rads[:, 0::2])
    position_embedding[:, 0::2] = sines
    cosines = np.cos(angle_rads[:, 1::2])
    position_embedding[:, 1::2] = cosines
    
    # position_embedding.shape: [1, sentence_length, d_model]
    position_embedding = position_embedding[np.newaxis, ...]
    
    return tf.cast(position_embedding, dtype=tf.float32)

class MultiHeadAttention(keras.layers.Layer):
    """
    理论上:
    x -> Wq0 -> q0
    x -> Wk0 -> k0
    x -> Wv0 -> v0
    
    实战中:
    q -> Wq0 -> q0
    k -> Wk0 -> k0
    v -> Wv0 -> v0
    
    实战中技巧：
    q -> Wq -> Q -> split -> q0, q1, q2...
    """
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        assert self.d_model % self.num_heads == 0
        
        self.depth = self.d_model // self.num_heads
        
        self.WQ = keras.layers.Dense(self.d_model)
        self.WK = keras.layers.Dense(self.d_model)
        self.WV = keras.layers.Dense(self.d_model)
        
        self.dense = keras.layers.Dense(self.d_model)
        
    # def scaled_dot_product_attention(q, k, v, mask):
    def scaled_dot_product_attention(self, q, k, v):
        """
        Args:
        - q: shape == (..., seq_len_q, depth)
        - k: shape == (..., seq_len_k, depth)
        - v: shape == (..., seq_len_v, depth_v)
        - seq_len_k == seq_len_v
        - mask: shape == (..., seq_len_q, seq_len_k)
        Returns:
        - output: weighted sum
        - attention_weights: weights of attention
        """

        # matmul_qk.shape: (..., seq_len_q, seq_len_k)
        matmul_qk = tf.matmul(q, k, transpose_b = True)

        dk = tf.cast(tf.shape(k)[-1], tf.float32)
        scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

    #     if mask is not None:
    #         # 使得在softmax后值趋近于0
    #         scaled_attention_logits += (mask * -1e9)

        # attention_weights.shape: (..., seq_len_q, seq_len_k)
        attention_weights = tf.nn.softmax(
            scaled_attention_logits, axis = -1)

        # output.shape: (..., seq_len_q, depth_v)
        output = tf.matmul(attention_weights, v)

        return output, attention_weights
    
    def split_heads(self, x, batch_size):
        # x.shape: (batch_size, seq_len, d_model)
        # d_model = num_heads * depth
        # x -> (batch_size, num_heads, seq_len, depth)
        
        x = tf.reshape(x,
                       (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])
    
#     def call(self, q, k, v, mask):
    def call(self, q, k, v):
        batch_size = tf.shape(q)[0]
        
        q = self.WQ(q) # q.shape: (batch_size, seq_len_q, d_model)
        k = self.WK(k) # k.shape: (batch_size, seq_len_k, d_model)
        v = self.WV(v) # v.shape: (batch_size, seq_len_v, d_model)
        
        # q.shape: (batch_size, num_heads, seq_len_q, depth)
        q = self.split_heads(q, batch_size)
        # k.shape: (batch_size, num_heads, seq_len_k, depth)
        k = self.split_heads(k, batch_size)
        # v.shape: (batch_size, num_heads, seq_len_v, depth)
        v = self.split_heads(v, batch_size)
        
        # scaled_attention_outputs.shape: (batch_size, num_heads, seq_len_q, depth)
        # attention_weights.shape: (batch_size, num_heads, seq_len_q, seq_len_k)
        #                                                  scaled_dot_product_attention(q, k, v, mask)
        scaled_attention_outputs, attention_weights = self.scaled_dot_product_attention(q, k, v)

        
        # scaled_attention_outputs.shape: (batch_size, seq_len_q, num_heads, depth)
        scaled_attention_outputs = tf.transpose(
            scaled_attention_outputs, perm = [0, 2, 1, 3])
        # concat_attention.shape: (batch_size, seq_len_q, d_model)
        concat_attention = tf.reshape(scaled_attention_outputs,
                                      (batch_size, -1, self.d_model))
        
        # output.shape : (batch_size, seq_len_q, d_model)
        output = self.dense(concat_attention)
        
        return output, attention_weights
    
def feed_forward_network(d_model, dff):
    # dff: dim of feed forward network.
    return keras.Sequential([
        keras.layers.Dense(dff, activation='relu'),
        keras.layers.Dense(d_model)
    ])


class EncoderLayer(keras.layers.Layer):
    """
    x -> self attention -> add & normalize & dropout
      -> feed_forward -> add & normalize & dropout
    """
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(EncoderLayer, self).__init__()
        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = feed_forward_network(d_model, dff)
        
        self.layer_norm1 = keras.layers.LayerNormalization(
            epsilon = 1e-6)
        self.layer_norm2 = keras.layers.LayerNormalization(
            epsilon = 1e-6)
        
        self.dropout1 = keras.layers.Dropout(rate)
        self.dropout2 = keras.layers.Dropout(rate)
    
#     def call(self, x, training, encoder_padding_mask):
    def call(self, x, training):
        # x.shape          : (batch_size, seq_len, dim=d_model)
        # attn_output.shape: (batch_size, seq_len, d_model)
        # out1.shape       : (batch_size, seq_len, d_model)
#         attn_output, _ = self.mha(x, x, x, encoder_padding_mask)
        attn_output, _ = self.mha(x, x, x)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layer_norm1(x + attn_output)
        
        # ffn_output.shape: (batch_size, seq_len, d_model)
        # out2.shape      : (batch_size, seq_len, d_model)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layer_norm2(out1 + ffn_output)
        
        return out2


# In[5]:


def conv(x, filters=64, kernel_size=(3,3), strides=1, padding='same', activation='relu'):
    x = layers.Conv2D(filters, kernel_size, strides, padding)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation(activation)(x)
    return x

def net(inputs):
    x = conv(inputs)
    x = layers.MaxPooling2D(pool_size=(2,2), strides=2)(x)
    x = conv(x, filters=128)
    x = layers.MaxPooling2D(pool_size=(2,2), strides=2)(x)
    x = conv(x, filters=256)
    x = conv(x, filters=256)
    x = layers.MaxPooling2D(pool_size=(2,1), strides=(2,1))(x)
    x = conv(x, filters=512)
    x = conv(x, filters=512)
    x = layers.MaxPooling2D(pool_size=(2,1), strides=(2,1))(x)
#     x = conv(x, filters=512, kernel_size=(3,3), strides=(2,2))
    x = conv(x, filters=512, kernel_size=(3,3), strides=(2,2), activation='sigmoid')
    return x

def map_to_sequence(x):
    shape = x.get_shape().as_list()
    assert shape[1]==1
    return keras.backend.squeeze(x, axis=1)

class EncoderModel(keras.layers.Layer):
    def __init__(self, num_layers=2, d_model=512, num_heads=8,
                 dff=1024, time_step=35, rate=0.1):
        super(EncoderModel, self).__init__()
        self.num_layers = num_layers
        self.d_model = d_model
        self.num_heads = num_heads
        self.dff = dff
        self.time_step = time_step
        self.rate = rate
        
        self.map_to_sequence = layers.Lambda(map_to_sequence)
        # position_embedding.shape: (1, max_length, d_model)
        self.position_embedding = get_position_embedding(self.time_step, self.d_model)
        self.dropout = layers.Dropout(self.rate)

        self.encoder_layers = [
            EncoderLayer(self.d_model, self.num_heads, self.dff, self.rate)
            for _ in range(self.num_layers)]
        
    def call(self, x, training):
        # x.shape: (batch_size, 1, time_step, d_model)
        x = self.map_to_sequence(x)
        # x.shape: (batch_size, time_step, d_model)
        x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        x += self.position_embedding
        x = self.dropout(x, training = training)
        
        for i in range(self.num_layers):
            x = self.encoder_layers[i](x, training)
        
        # x.shape: (batch_size, time_step, d_model)
        return x

# inputs = Input(name='input_image', shape=(32,280,3))
inputs = Input(name='input_image', shape=(32,None,1))

y = net(inputs)

y = EncoderModel(num_layers=num_layers)(y, training)

print(y.shape)

y = layers.Dropout(dropout)(y, training=training)
y = layers.Dense(num_classes, activation='softmax', name='FC_1')(y)

predict_model = Model(inputs, y)

predict_model.load_weights(filepath=model_path, by_name=True)

    def call(self, x, training):
        # x.shape          : (batch_size, seq_len, dim=d_model)
        # attn_output.shape: (batch_size, seq_len, d_model)
        # out1.shape       : (batch_size, seq_len, d_model)
#         attn_output, _ = self.mha(x, x, x, encoder_padding_mask)
        attn_output, _ = self.mha(x, x, x)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layer_norm1(x + attn_output)
        
        # ffn_output.shape: (batch_size, seq_len, d_model)
        # out2.shape      : (batch_size, seq_len, d_model)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layer_norm2(out1 + ffn_output)
        
        return out2

This may be caused by multiline strings or comments not indented at the same level as the code.
    def call(self, x, training):
        # x.shape          : (batch_size, seq_len, dim=d_model)
        # attn_output.shape: (batch_size, seq_len, d_

In [4]:
img_root = '/home/shaoran/work/jupyter_notebook/Crnn/ocr_test_rec'
image_names = os.listdir(img_root)
image_pathes = []
for name in image_names:
    name = os.path.join(img_root, name)
    image_pathes.append(name)

print(image_pathes[:3])

['/home/shaoran/work/jupyter_notebook/Crnn/ocr_test_rec/2390_3394_1186_2130_2043_3534_3557_553_.jpg', '/home/shaoran/work/jupyter_notebook/Crnn/ocr_test_rec/3495_828_1696_1210_2512_1913_1841_2866_.jpg', '/home/shaoran/work/jupyter_notebook/Crnn/ocr_test_rec/524_4650_2495_.jpg']


In [5]:
# img_mask_32x70 = []
# w0 = []
# img_mask_32x280 = []
# w1 = []
# img_mask_32xlonger = []
# w2 = []

In [6]:
#  for path in image_pathes:
#     img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
#     h,w = img.shape
# #     print(h,w,c)
#     scale = h / 32
#     ww = int(w * scale)
#     img = cv2.resize(img, (ww, 32))
#     img = np.expand_dims(img, axis=-1)
#     if ww > 280:
#         img_mask_32xlonger.append(img)
#         w2.append(ww)
#     elif ww > 70:
#         img_mask_32x280.append(img)
#         w1.append(ww)
#     else:
#         img_mask_32x70.append(img)
#         w0.append(ww)

In [7]:
# print(len(img_mask_32xlonger))
# print(len(img_mask_32x280))
# print(len(img_mask_32x70))

In [8]:
# %%time
# img_ = cv2.imread(image_pathes[0], cv2.IMREAD_GRAYSCALE)
# img_ = cv2.resize(img_, (280,32))
# i_ = np.expand_dims(img_, axis=0)
# i_ = np.expand_dims(i_, axis=-1)

In [9]:
# %%time
# out_ = predict_model.predict(i_)

In [11]:
%%time

# inference time: 0.391s 0.4s

# w_0 = max(max(w0), max(w1), max(w2))
# img_mask = img_mask_32x70 + img_mask_32x280 + img_mask_32xlonger
# print(len(img_mask))

# mask_0 = np.full((len(img_mask), 32, w_0, 1), fill_value=255)
# print('1')
# for i, img in enumerate(img_mask):
#     mask_0[i,:, :img.shape[1], :] = img
# print('2')


for path in image_pathes:
    img = cv2.imread(image_pathes[0], cv2.IMREAD_GRAYSCALE)
    img = np.expand_dims(img, axis=0)
    img = np.expand_dims(img, axis=-1)
    out = predict_model.predict(img)
    out_0 = keras.backend.ctc_decode(out, input_length=[out.shape[1]]*out.shape[0])

(1, 35, 6796)
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 35, 6796)
(1, 3