In [1]:
import os
import re
import zipfile

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from PIL import Image
import ast
import json
from glob import glob

import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds

In [2]:
#!pip install tensorflow_datasets

In [3]:
#os.environ["CUDA_VISIBLE_DEVICES"] = "-1" #CPU

In [4]:
tf.__version__, tf.test.is_gpu_available()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


RuntimeError: CUDA runtime implicit initialization on GPU:0 failed. Status: out of memory

In [None]:
max_sequence = 9
blank_num = -1
characters_0 = '0123456789'
characters_alphabet = 'ABCDEFGHIJKLMNPQRSTUVWXYZ-'
characters_1 = '가나다라마바사아자하거너더러머버서어저허고노도로모보소오조호구누두루무부수우주'
characters_1d = 'GNDLMBSOJHgndlmbsojhgNDLMBSOJHGNDLMBSUJ'
characters_2 = '대배해제기교표외인천국합육공영준협정울경강원충북운남전산광'
characters_2d = 'DBHJGGPYICGHUGYJHJUGKWCBUNJSG'
characters_3 = '_       abcd  '
plate_characters = characters_0 + characters_alphabet + characters_1 + characters_2 + characters_3
num_classes = len(plate_characters)
num_classes

In [None]:
padded_image_shape = (16*6, 16*6*3)
EOS = num_classes - 1
SOS = EOS - 1
head_n = 8
l1 = 1e-8
activation = 'relu'#'selu' is not converted to tflite
kernel_init = tf.initializers.he_normal()
path_weight = "model/LPR"
max_data_m = 10
SOS, EOS

In [None]:
path_synthetic = '/home/mvlab/Downloads/plate_generator/gen/'
path_label = path_synthetic + 'label.csv'

In [None]:
os.path.isdir(path_synthetic), os.path.isfile(path_label)

In [None]:
np.set_printoptions(precision=3)

In [None]:
df = pd.read_csv(path_label, sep=' ')
df.shape

In [None]:
df.columns

In [None]:
df.head()

In [None]:
df['type'].unique()

In [None]:
df.iloc[0]

In [None]:
def read_synthetic(max_m=10000):
    df = pd.read_csv(path_label, sep=' ')
    df = df[:max_m]
    m = len(df)
    x0 = df['x0']
    x1 = df['x1']
    x2 = df['x2']
    x3 = df['x3']
    y0 = df['y0']
    y1 = df['y1']
    y2 = df['y2']
    y3 = df['y3']
    
    min_x = np.minimum(df['x0'], df['x3'])
    min_y = np.minimum(df['y0'], df['y1'])
    max_x = np.maximum(df['x1'], df['x2'])
    max_y = np.maximum(df['y2'], df['y3'])
    box_ratio = (max_x - min_x) / (max_y - min_y)
    shear_y0 = -box_ratio * (y2 - y3) / (x2 - x3)
    shear_y1 = -box_ratio * (y1 - y0) / (x1 - x0)
    shear_x0 = -box_ratio * (x3 - x0) / (y3 - y0)
    shear_x1 = -box_ratio * (x2 - x1) / (y2 - y1)
    shear_y = (shear_y0 + shear_y1) / 2
    shear_x = (shear_x0 + shear_x1) / 2
    df['min_x'] = min_x
    df['min_y'] = min_y
    df['max_x'] = max_x
    df['max_y'] = max_y
    df['shear_x'] = shear_x
    df['shear_y'] = shear_y

    plate_chars = plate_characters
    bbox = df[['min_x', 'min_y', 'max_x', 'max_y', 'type']].values
    vertex = df[['x0', 'y0', 'x1', 'y1', 'x2', 'y2', 'x3', 'y3','type']].values
    cxy_col = [str(i) for i in range(18)]
    char_cxy = df[cxy_col].values
    plate_type = df['type'].values

    text_len = max_sequence
    m = len(df)
    class_nums = np.zeros((m, text_len)) - 1
    for i in range(m):
        text0 = df.loc[i, 'text']
        for j in range(len(text0)):
            class_num = plate_chars.index(text0[j])
            class_nums[i, j] = class_num
    print('class_nums', class_nums.shape)
    print('char_cxy', char_cxy.shape)
    
    label = np.concatenate((vertex, class_nums, char_cxy), axis=-1)    
    print('label', label.shape)
    
    img_path_list = list(path_synthetic + df['path'].values)
    return img_path_list, label

In [None]:
paths, labels = read_synthetic(10000)
print('labels', len(labels), len(paths))
print(labels[0])

In [None]:
i_index = np.arange(10)
np.random.shuffle(i_index)
i_index

In [None]:
x_list = []
y_list = []
i_index = np.arange(len(paths))
np.random.shuffle(i_index)
for i in i_index:
    path_img = paths[i]
    label = labels[i]
    if os.path.isfile(path_img):
        img = Image.open(path_img)
        arr = np.array(img)
        
        
        img_h, img_w, img_c = arr.shape
        #print('label', label.shape)
        coord = label[:8]
        type_text = label[8:8+1+max_sequence]        
        char_cxy = label[8+1+max_sequence:]
        
        coord_2d = np.reshape(coord, [-1, 2])
        coord_2d_norm = coord_2d / (np.array((img_w, img_h), np.float))
        coord_norm = np.reshape(coord_2d_norm, [-1])
        
        char_cxy_2d = np.reshape(char_cxy, [-1, 2])
        char_cxy_2d_norm = char_cxy_2d / (np.array((img_w, img_h), np.float))
        char_cxy_norm = np.reshape(char_cxy_2d_norm, [-1])
        
        label_norm = np.concatenate((coord_norm, type_text, char_cxy_norm), -1)
        
        x_list.append(arr)
        y_list.append(label_norm)
        if len(x_list)%100 == 0 :
            print(len(paths), len(x_list))
        if len(x_list)>max_data_m:
            break
print('x_list', len(x_list), len(y_list))

In [None]:
plt.imshow(x_list[0])

In [None]:
def visualize_plate(
    image, y, figsize=(12, 12), linewidth=1, color=[0, 0, 1]
):
    """Visualize Detections"""    
    img_h, img_w, img_c = image.shape
    plt.figure(figsize=figsize)
    plt.axis("off")
    plt.imshow(image)
    ax = plt.gca()
    
    vertices_norm = y[:8]    
    plate_type = y[8]
    plate_text = y[9:]
    vertices_norm_2d = np.reshape(vertices_norm, [-1, 2])
    vertices_2d = vertices_norm_2d * np.array((img_w, img_h), np.float)
    
    x0, y0, x1, y1, x2, y2, x3, y3 = np.reshape(vertices_2d, [-1])
    
    w, h = x2 - x1, y2 - y1    
    color = [1,0,0]
    linewidth = 10
    plt.plot(x0, y0, x1, y1, 'go-', linewidth=linewidth)
    plt.plot(x1, y1, x2, y2, 'ro--', linewidth=linewidth)
    plt.plot(x2, y2, x3, y3, marker = 'o', linewidth=linewidth)
    plt.plot(x3, y3, x0, y0, marker = 'o', linewidth=linewidth)
        
    txt = str.format('(%d)' % (plate_type)) + str(plate_text)
    ax.text(x0, y0, txt, bbox={"facecolor": [1,1,0], "alpha": 0.4}, clip_box=ax.clipbox, clip_on=True,)
        
    plt.show()
    return ax

In [None]:
def display_data(X, Y, stride=1):
    for i in range(len(X)):
        if i%stride==0:            
            ax = visualize_plate(X[i], Y[i])

In [None]:
display_data(x_list, y_list, stride=1000)

In [None]:
def shift_matrix(x, y):
    z = x * 0
    o = z + 1
    mat = tf.stack([o, z, x, z, o, y], -1)
    row = tf.stack([z, z, z+1], -1)
    mat = tf.concat((mat, row), -1)
    mat = tf.reshape(mat, [-1, 3, 3])
    return mat

def scale_matrix(x, y):
    z = x * 0
    mat = tf.stack([x, z, z, z, y, z], -1)
    row = tf.stack([z, z, z + 1], -1)
    mat = tf.concat((mat, row), -1)
    mat = tf.reshape(mat, [-1, 3, 3])
    return mat

def rotate_matrix(radian):
    c = tf.cos(radian)
    s = tf.sin(radian)
    z = c * 0
    mat = tf.stack([c, -s, z, s, c, z], -1)
    row = tf.stack([z, z, z + 1], -1)
    mat = tf.concat((mat, row), -1)
    mat = tf.reshape(mat, [-1, 3, 3])
    return mat


def shear_x_matrix(radian_x):
    x = tan(radian_x)
    z = x * 0
    o = z + 1
    mat = tf.stack([o, x, z, z, o, z], -1)
    row = tf.stack([z, z, z + 1], -1)
    mat = tf.concat((mat, row), -1)
    mat = tf.reshape(mat, [-1, 3, 3])
    return mat

def shear_y_matrix(radian_y):
    y = tan(radian_y)
    z = y * 0
    o = z + 1
    mat = tf.stack([o, z, z, y, o, z], -1)
    row = tf.stack([z, z, z + 1], -1)
    mat = tf.concat((mat, row), -1)
    mat = tf.reshape(mat, [-1, 3, 3])
    return mat

def shear_matrix(radian_x, radian_y):
    
    x = tan(radian_x)
    y = tan(radian_y)
    z = x * 0
    o = z + 1
    mat = tf.stack([o, x, z, y, o, z], -1)
    row = tf.stack([z, z, z + 1], -1)
    mat = tf.concat((mat, row), -1)
    mat = tf.reshape(mat, [-1, 3, 3])
    return mat


In [None]:
def convert_norm_to_uv(coord):
    return coord * 2 - 1 

def convert_uv_to_norm_to(coord):
    return coord + 1 / 2


def transform_uv(uv, mat):
    #coord : (m, n, 2)
    #theta : (m, 2, 3)    
    inv_mat = tf.linalg.inv(mat)        
    xyo = tf.concat((uv, 1 + 0 * uv[:, :, :1]), -1)
    new_xy = tf.einsum('mrc,msc->mrs', inv_mat, xyo)
    new_xy = tf.transpose(new_xy, [0, 2, 1])
    new_xy = new_xy[:, :, :2]
    return new_xy


def transform_xy(xy, theta):
    m = tf.shape(xy)[0]
    theta = tf.linalg.inv(theta)
    theta = theta[:, :2]
    # xy [0, 1] > [-1, 1]
    xy = (xy - 0.5) * 2
    theta = tf.reshape(theta, [-1, 2, 3])
    xy = tf.reshape(xy, [m, -1, 2])
    xyo = tf.concat((xy, 1 + 0 * xy[:, :, :1]), -1)
    new_xy = tf.einsum('mrc,msc->mrs', theta, xyo)
    new_xy = tf.transpose(new_xy, [0, 2, 1])

    return new_xy

In [None]:
#y = sign(x) = -1 if x < 0; 0 if x == 0; 1 if x > 0.
def sign(v):
    return tf.cast(v > 0, tf.float32) + -1 * tf.cast(v < 0, tf.float32)

def tan(v):
    return tf.sin(v)/tf.cos(v)

In [None]:
def get_x1(t1, t2, width, height):
    a = sign(t2) * width - height * tf.sin(t2)/tf.cos(t2)
    b = (sign(t2) * tf.cos(t1) * tf.cos(t2) - sign(t1) * tf.sin(t1) * tf.sin(t2)) / (tf.cos(t1)*tf.cos(t2))
    x1 = a / b
    return x1

def get_v1_v2(t1, t2, width, height):
    x1 = get_x1(t1, t2, width, height)
    #x1 = get_x1_new(t1, t2, width, height)
    y1 = x1 / tf.cos(t1) * tf.sin(t1)
    x2 = width * sign(t2) - sign(t2) * x1
    y2 = height - sign(t1) * y1
    return x1,y1,x2,y2

def get_wH(t1, t2, width, height):
    x1, y1, x2, y2 = get_v1_v2(t1, t2, width, height)
    w = x1/tf.cos(t1)
    h = y2/tf.cos(t2)
    pi = 3.141592653589793
    right = 90 * pi / 180
    H = tf.sin(right - t1 - t2) * h
    return w / width, H / height

def get_align_scale_matrix(radian_x, radian_y, width, height):
    min_angle = 0.0001
    rotate_mat = rotate_matrix(-radian_y)
    shear_x_mat = shear_x_matrix(-radian_x - radian_y)
    
    is_zero_radian_x = tf.cast(tf.abs(radian_x) < min_angle, tf.float32)
    is_zero_radian_y = tf.cast(tf.abs(radian_y) < min_angle, tf.float32)
    radian_x = is_zero_radian_x * sign(radian_x) * radian_x * 0+min_angle + (1-is_zero_radian_x) * radian_x
    radian_y = is_zero_radian_y * sign(radian_y) * radian_y * 0+min_angle + (1-is_zero_radian_y) * radian_y
    radian_x = tf.where(tf.abs(radian_x) < min_angle, sign(radian_x) * radian_x * 0+min_angle, radian_x)
    radian_y = tf.where(tf.abs(radian_y) < min_angle, sign(radian_y) * radian_y * 0+min_angle, radian_y)
    w, h = get_wH(-radian_y, -radian_x, width, height) 
    scale_down_rotate_o = scale_matrix(w, h)

    rot_mat = tf.matmul(rotate_mat, shear_x_mat)
    rot_scale_mat = tf.matmul(rot_mat, scale_down_rotate_o) #[2,3,3] vs [4,3,3]

    return rot_scale_mat

In [None]:
x = tf.linspace(-1.0, 1.0, 6, name='linspace_x')
y = tf.linspace(-1.0, 1.0, 3, name='linspace_y')
x_t, y_t = tf.meshgrid(x, y, name='meshgrid')
x_t, y_t

In [None]:
x = 2 * (tf.range(6)/5 - 0.5)
x = tf.reshape(x, [1, -1])
x = tf.tile(x, [3, 1])
x

In [None]:
def affine_grid_generator(height, width, theta):
    num_batch = tf.shape(theta)[0]
    
    x = tf.linspace(-1.0, 1.0, width)
    y = tf.linspace(-1.0, 1.0, height)
    x_t, y_t = tf.meshgrid(x, y)
    x_t_flat = tf.reshape(x_t, [-1])
    y_t_flat = tf.reshape(y_t, [-1])

    ones = tf.ones_like(x_t_flat)
    sampling_grid = tf.stack([x_t_flat, y_t_flat, ones])  # (3, h*w)
    sampling_grid = tf.expand_dims(sampling_grid, axis=0)
    # sampling_grid = tf.tile(sampling_grid, tf.stack([num_batch, 1, 1]))#(num_batch, 3, h*w)
    sampling_grid = tf.tile(sampling_grid, [num_batch, 1, 1])  # (num_batch, 3, h*w)
    theta = tf.cast(theta, tf.float32)
    sampling_grid = tf.cast(sampling_grid, tf.float32)

    batch_grids = tf.matmul(theta, sampling_grid)  # (m, 2, 3)@(m, 3, h*w)=(m,2,h*w)
    batch_grids = tf.reshape(batch_grids, [num_batch, 2, height, width])
    return batch_grids


def get_pixel_value(img, x, y):
    # img (m,h,w,c)
    # x,y (m,h,w)
    shape = tf.shape(x)
    m = shape[0]
    h = shape[1]
    w = shape[2]
    batch_idx = tf.range(0, m)
    batch_idx = tf.reshape(batch_idx, [m, 1, 1])
    b = tf.tile(batch_idx, [1, h, w])

    indices = tf.stack([b, y, x], axis=3)  # (m,h,w,3)

    return tf.gather_nd(img, indices)


def bilinear_sampler(img, batch_grids):
    # batch_grids (m, 2, h, w)
    # img (m,h,w,c)
    uv_x = batch_grids[:, 0]
    uv_y = batch_grids[:, 1]
    H = tf.shape(img)[1]
    W = tf.shape(img)[2]
    max_y = tf.cast(H - 1, tf.float32)
    max_x = tf.cast(W - 1, tf.float32)
    # x [-1, 1]
    x = 0.5 * ((uv_x + 1.0) * max_x)
    y = 0.5 * ((uv_y + 1.0) * max_y)

    # grab 4 nearest corner points for each (x_i, y_i)
    x0 = tf.floor(x)  # precision bad?
    x1 = x0 + 1
    y0 = tf.floor(y)
    y1 = y0 + 1

    # clip out of boundary index
    x0 = tf.clip_by_value(x0, 0, max_x)
    x1 = tf.clip_by_value(x1, 0, max_x)
    y0 = tf.clip_by_value(y0, 0, max_y)
    y1 = tf.clip_by_value(y1, 0, max_y)

    # deltas
    wa = (x1 - x) * (y1 - y)
    wb = (x1 - x) * (y - y0)
    wc = (x - x0) * (y1 - y)
    wd = (x - x0) * (y - y0)

    wa = tf.expand_dims(wa, -1)
    wb = tf.expand_dims(wb, -1)
    wc = tf.expand_dims(wc, -1)
    wd = tf.expand_dims(wd, -1)

    x0 = tf.cast(x0, tf.int32)
    x1 = tf.cast(x1, tf.int32)
    y0 = tf.cast(y0, tf.int32)
    y1 = tf.cast(y1, tf.int32)

    Ia = get_pixel_value(img, x0, y0)
    Ib = get_pixel_value(img, x0, y1)
    Ic = get_pixel_value(img, x1, y0)
    Id = get_pixel_value(img, x1, y1)

    out = tf.add_n([wa*Ia, wb*Ib, wc*Ic, wd*Id])
    #out = wa * Ia + wb * Ib + wc * Ic + wd * Id

    return out


def sampling(net, theta, dst_h, dst_w):
    theta = tf.reshape(theta, [-1, 2, 3])
    #h = tf.shape(net)[1]
    #w = tf.shape(net)[2]
    batch_grids = affine_grid_generator(dst_h, dst_w, theta)
    out = bilinear_sampler(net, batch_grids)
    return out

In [None]:
def get_box_center_and_size(bbox):
    x0, y0, x1, y1 = tf.split(bbox, 4, -1)
    w = x1 - x0
    h = y1 - y0
    cx = (x1 + x0) / 2
    cy = (y1 + y0) / 2
    return cx, cy, w, h

In [None]:
X = tf.constant(x_list[:1])
Y = tf.constant(y_list[:1])
X = tf.cast(X, tf.float32)
X.shape, Y.shape, Y[0]

In [None]:
def get_theta_from_coords(coords):
    x0 = coords[:, 0, 0]
    y0 = coords[:, 0, 1]
    x1 = coords[:, 1, 0]
    y1 = coords[:, 1, 1]
    x2 = coords[:, 2, 0]
    y2 = coords[:, 2, 1]
    x3 = coords[:, 3, 0]
    y3 = coords[:, 3, 1]
        
    tx = (x0 + x1 + x2 + x3)/4
    ty = (y0 + y1 + y2 + y3)/4
    #scale_x = ((x1 - x0) + (x2 - x3))/2    
    #scale_y = ((y3 - y0) + (y2 - y1))/2
    shear_x = -((x0 - x3) / (y0 - y3) + (x1 - x2) / (y1 - y2)) / 2
    shear_y = -((y1 - y0) / (x1 - x0) + (y2 - y3) / (x2 - x3)) / 2
    shear_x = tf.math.atan(shear_x)
    shear_y = tf.math.atan(shear_y)
    
    x_min = tf.reduce_min(coords[:, :, 0], 1)
    x_max = tf.reduce_max(coords[:, :, 0], 1)
    y_min = tf.reduce_min(coords[:, :, 1], 1)
    y_max = tf.reduce_max(coords[:, :, 1], 1)
    w = x_max - x_min
    h = y_max - y_min
    scale_x = w/2
    scale_y = h/2
    
    return tx, ty, scale_x, scale_y, shear_x, shear_y

In [None]:
def generate_transform_matrix(tx, ty, sx, sy, shear_x, shear_y):    
    shift_mat = shift_matrix(tx, ty)
    scale_mat = scale_matrix(sx, sy)    
    
    w = sx * 2
    h = sy * 2
    align_scale_mat = get_align_scale_matrix(shear_x, shear_y, w, h)    
    tm = tf.matmul(shift_mat, align_scale_mat)
    tm = tf.matmul(tm, scale_mat)
    tm = tf.cast(tm, tf.float32)
    return tm

def convert_transform_matrix(tx, ty, sx, sy, shear_x, shear_y):    
    shift_mat = shift_matrix(tx, ty)
    scale_mat = scale_matrix(sx, sy)    
    shear_mat = shear_matrix(shear_x, shear_y)
    
    tm = tf.matmul(shift_mat, scale_mat)
    tm = tf.matmul(tm, shear_mat)
    tm = tf.cast(tm, tf.float32)
    return tm

In [None]:
def get_align_matrix(vertices_uv):
    cx, cy, sx, sy, shear_x, shear_y = get_theta_from_coords(vertices_uv)        
    transform_mat = generate_transform_matrix(cx, cy, sx, sy, shear_x, shear_y)
    return transform_mat

In [None]:
def get_augment_transform_matrix(m, delta):
    z = tf.zeros(m)    
    shear_y = tf.random.normal(tf.shape(z), stddev=delta)
    shear_x = tf.random.normal(tf.shape(z), stddev=delta)
    ty = tf.random.normal(tf.shape(z), stddev=delta)
    tx = tf.random.normal(tf.shape(z), stddev=delta)

    #width_scope = [0.07, 0.3]  # from [0.156, 0.116]
    sx = tf.random.uniform(tf.shape(z), minval=1, maxval=1 + delta)
    sy = tf.random.uniform(tf.shape(z), minval=1, maxval=1 + delta)
    sx += tf.abs(tx)
    sy += tf.abs(ty)

    #thetas = tf.stack([tx, ty, sx, sy, shear_x, shear_y], 1)
    return tx, ty, sx, sy, shear_x, shear_y

In [None]:
def get_aug_matrix(vertices_uv, delta):    
    transform_mat = get_align_matrix(vertices_uv)
    m = tf.shape(vertices_uv)[0]
    
    cx, cy, sx, sy, shear_x, shear_y = get_augment_transform_matrix(m, delta)    
    transform_mat = convert_transform_matrix(cx, cy, sx, sy, shear_x, shear_y)
    
    return transform_mat

In [None]:
Y = tf.cast(Y, tf.float32)
vertices = Y[:, :8]
plate_type = Y[:, 8]
plate_text = Y[:, 9:9 + max_sequence]
char_cxy = Y[:, 9 + max_sequence:9 + max_sequence + max_sequence*2]

vertices = tf.reshape(vertices, [-1, 4, 2])
char_cxy = tf.reshape(char_cxy, [-1, max_sequence, 2])
vertices_uv = convert_norm_to_uv(vertices)
char_cxy_uv = convert_norm_to_uv(char_cxy)
align_mat = get_align_matrix(vertices_uv)
aug_mat = get_aug_matrix(vertices_uv, delta=0.1)
transform_mat = tf.matmul(align_mat, aug_mat)
transform_mat.shape, transform_mat.dtype

In [None]:
x_sampled = sampling(X, transform_mat[:, :2], 200, 400)
x_sampled.shape

In [None]:
transformed_vertices_uv = transform_uv(vertices_uv, transform_mat)
transformed_char_cxy_uv = transform_uv(char_cxy_uv, transform_mat)
transformed_vertices_uv.shape, transformed_char_cxy_uv.shape, transformed_vertices_uv[0], transformed_char_cxy_uv[0]

In [None]:
y_cx, y_cy, y_sx, y_sy, y_shear_x, y_shear_y = get_theta_from_coords(transformed_vertices_uv)        
tf.stack((y_cx, y_cy, y_sx, y_sy, y_shear_x, y_shear_y), -1)

In [None]:
transformed_char_cxy_uv = transform_uv(char_cxy_uv, transform_mat)
transformed_char_cxy_uv.shape, transformed_char_cxy_uv[0]

In [None]:
x_sampled_img = x_sampled.numpy().astype(np.uint8)
x_sampled_img.shape

In [None]:
x_sample_concat = np.concatenate((x_sampled_img), axis=1)
plt.figure(figsize=(15,10))
plt.imshow(x_sample_concat)

In [None]:
aug_align_mat = generate_transform_matrix(y_cx, y_cy, y_sx, y_sy, y_shear_x, y_shear_y)
x_aug_aligned_sampled = sampling(x_sampled, aug_align_mat[:, :2], 200, 400)
x_aug_aligned_sampled.shape
x_aug_aligned_sampled_img = x_aug_aligned_sampled.numpy().astype(np.uint8)
x_sample_concat = np.concatenate((x_aug_aligned_sampled_img[:10]), axis=1)
plt.figure(figsize=(15,10))
plt.imshow(x_sample_concat)

In [None]:
ax = visualize_plate(x_list[0], y_list[0])

In [None]:
def normalilze_4d(x):
    mini = tf.reduce_min(x, [1, 2], True)
    maxi = tf.reduce_max(x, [1, 2], True)
    return (x - mini) / (maxi - mini)

In [None]:
def preprocess_data(X, label):
    
    label = tf.expand_dims(label, 0)
    X = tf.expand_dims(X, 0)
    vertices = label[:, :8]
    plate_type = label[:, 8]
    plate_text = label[:, 9:9 + max_sequence]
    char_cxy = label[:, 9 + max_sequence:9 + max_sequence + max_sequence*2]

    vertices = tf.reshape(vertices, [-1, 4, 2])
    char_cxy = tf.reshape(char_cxy, [-1, max_sequence, 2])
    vertices_uv = convert_norm_to_uv(vertices)
    char_cxy_uv = convert_norm_to_uv(char_cxy)
    align_mat = get_align_matrix(vertices_uv)
    aug_mat = get_aug_matrix(vertices_uv, delta=0.01)#hyper
    transform_mat = tf.matmul(align_mat, aug_mat)
    
    X = tf.cast(X, tf.float32)
    x_sampled = sampling(X, transform_mat[:, :2], padded_image_shape[0], padded_image_shape[1])
    transformed_vertices_uv = transform_uv(vertices_uv, transform_mat)
    transformed_char_cxy_uv = transform_uv(char_cxy_uv, transform_mat)
    #image = tf.image.resize(image, padded_image_shape)    
    y_cx, y_cy, y_sx, y_sy, y_shear_x, y_shear_y = get_theta_from_coords(transformed_vertices_uv)
    y_align = tf.stack((y_cx, y_cy, y_sx, y_sy, y_shear_x, y_shear_y), -1)
    
    return x_sampled, y_align, plate_text, transformed_char_cxy_uv

In [None]:
class LabelEncoder:
    
    def __init__(self):
        pass    
    
    def _encode_sample(self, image_shape, y_align, gt_text, gt_cxy_uv):
        
        y_align = tf.reshape(y_align, [-1])
        gt_text = tf.reshape(gt_text, [-1])
        gt_cxy_uv_flat = tf.reshape(gt_cxy_uv, [-1])
        label = tf.concat([y_align, gt_text, gt_cxy_uv_flat], axis=-1)        
        return label
    
    def encode_batch(self, batch_images, y_align, gt_text, gt_cxy_uv):
        
        images_shape = tf.shape(batch_images)
        batch_size = images_shape[0]        
                            
        labels = tf.TensorArray(dtype=tf.float32, size=batch_size, dynamic_size=True)
        for i in range(batch_size):
            label = self._encode_sample(images_shape, y_align[i], gt_text[i], gt_cxy_uv[i])
            labels = labels.write(i, label)
        
        batch_images = tf.cast(batch_images, tf.float32)
        label = labels.stack()
        return batch_images, label      
        

In [None]:
from tensorflow import Tensor
from tensorflow.keras.layers import Input, Conv2D, ReLU, BatchNormalization,\
                                    Add, AveragePooling2D, Flatten, Dense, MaxPool2D
from tensorflow.keras.models import Model

In [None]:
regulizer = tf.keras.regularizers.L2(l1)

def residual_block(x: Tensor, downsample: bool, filters: int, kernel_size: int = 3) -> Tensor:
    y = Conv2D(kernel_size=kernel_size,
               strides= (1 if not downsample else 2),
               filters=filters, 
               activation=activation,
               padding="same",
               kernel_initializer=kernel_init,
               kernel_regularizer=regulizer)(x)    
    y = Conv2D(kernel_size=kernel_size,
               strides=1,
               filters=filters,
               padding="same",
               kernel_initializer=kernel_init,
               kernel_regularizer=regulizer)(y)   
    
    if downsample:
        x = Conv2D(kernel_size=3,
                   strides=2,
                   filters=filters,
                   activation=activation,
                   padding="same",
                   kernel_regularizer=regulizer)(x)
    out = Add()([x, y])
    out = ReLU()(out)
    return out

def create_resnet_backbone(inputs):
    
    #inputs = Input(shape=(None, None, 3))    
    num_filters = 64
    
    #t = BatchNormalization()(inputs)    
    t = Conv2D(kernel_size=7,
               strides=2,
               filters=num_filters,
               activation=activation,
               padding="same",
               kernel_initializer=kernel_init,
               kernel_regularizer=regulizer, 
               )(inputs)
    t = MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(t)
    
    num_blocks_list = [2, 2, 2]#resnet-18
    t_list = []
    for i in range(len(num_blocks_list)):
        num_blocks = num_blocks_list[i]
        for j in range(num_blocks):
            t = residual_block(t, downsample=(j==0 and i!=0), filters=num_filters)
        if i>=1:
            t_list.append(t)
        num_filters *= 2
    
        
    return t


def create_align_net(inputs):
    
    #inputs = Input(shape=(None, None, 3))    
    num_filters = 64
    
    #t = BatchNormalization()(inputs)    
    t = Conv2D(kernel_size=7,
               strides=2,
               filters=num_filters,
               activation=activation,
               padding="same",
               kernel_initializer=kernel_init,
               kernel_regularizer=regulizer, 
               )(inputs)
    t = MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='same')(t)
    
    num_blocks_list = [2, 2, 2, 2]#resnet-18
    t_list = []
    for i in range(len(num_blocks_list)):
        num_blocks = num_blocks_list[i]
        for j in range(num_blocks):
            t = residual_block(t, downsample=(j==0 and i!=0), filters=num_filters)
        if i>=1:
            t_list.append(t)
        num_filters *= 2
    
    t = Flatten()(t)
    t = Dense(256, activation=activation, kernel_regularizer=regulizer)(t)
    t = Dense(6, name='align')(t)
        
    return t

In [None]:
def position_encoding(max_sequence, seq, dimension_model):

    if max_sequence % 2 == 1:
        max_sequence += 1

    pos = tf.range(max_sequence, dtype=tf.float32)
    pos = tf.reshape(pos, [max_sequence, 1])
    theta = pos/tf.pow(10000.0, tf.range(dimension_model, dtype=tf.float32)/dimension_model)
    emb_even = tf.sin(theta)[:, ::2]
    emb_odd = tf.cos(theta)[:, 1::2]
    emb_even_odd = tf.stack((emb_even, emb_odd), axis=-1)
    emb_even_odd = tf.reshape(emb_even_odd, [max_sequence, dimension_model])
    emb = tf.expand_dims(emb_even_odd[:seq], 0)
    return emb

In [None]:
def masked_multi_head_attention(layers, y, num_units, s, head=head_n):
    '''
    :param y: (m, s, c) > padding >(m, S, c)
    :param x: == y
    :param num_units: 256
    :param head: 4
    :return: (m, s, c)
    '''

    z_expand = tf.expand_dims(y, axis=1)
    one = tf.ones_like(z_expand[:1, :, :, 0], dtype=tf.float32)
    one = tf.tile(one, [1, s, 1])
    #triangle_mask = tf.linalg.LinearOperatorLowerTriangular(one).to_dense()#disable on .pb?    
    hot = tf.one_hot(tf.range(s, dtype=tf.int64), s)#(s, s)
    acumulate_hot = []
    for i in range(s):
        row = hot[i]
        for j in range(i):
            row += hot[j]
        
        acumulate_hot.append(row)
    
    triangle_mask = tf.stack(acumulate_hot, axis=0)
    triangle_mask = tf.cast(triangle_mask, tf.float32)            
        
    QKV = layers[0](z_expand)
    #QKV = slim.fully_connected(z_expand, 3 * num_units, activation_fn=None, scope='fc_query_key_value')
    QKV = tf.tile(QKV, [1, s, 1, 1])
    QKV = tf.reshape(QKV, [-1, s, 3 * num_units])

    query, key, value = tf.split(QKV, 3, -1, name='masked_split')

    Q = tf.concat(tf.split(query, head, axis=-1), 0)
    K = tf.concat(tf.split(key, head, axis=-1), 0)
    V = tf.concat(tf.split(value, head, axis=-1), 0)

    relevant_score = tf.matmul(Q, tf.transpose(K, [0, 2, 1])) / np.sqrt(num_units)  # (m, q_s, c) * (m, c, k_s) = (m, q_s, k_s)
    
    relevant_score = tf.cast(relevant_score, tf.float32)
    triangle_mask = tf.cast(triangle_mask, tf.float32)
    relevant_score += -10000.0 * (1.0 - triangle_mask)        
    
    attention = tf.nn.softmax(relevant_score, axis=-1)
    context = tf.matmul(attention, V)

    attention_list = tf.split(attention, head, axis=0)
    context_list = tf.split(context, head, axis=0)

    attention = tf.reduce_mean(attention_list, axis=0)

    context = tf.concat(context_list, axis=-1)
    context_4d = tf.reshape(context, [-1, s, s, num_units])
    eye = tf.eye(s, s)
    eye = tf.reshape(eye, [1, s, s, 1])
    context = tf.reduce_sum(eye * context_4d, axis=2)
    
    if True:
        eye_sum = tf.reduce_sum(eye, axis=1)
        context = context_4d[:, -1]

    #context = slim.fully_connected(context, num_units, activation_fn=None, scope='Linear')    
    context = layers[1](context)
    return context, attention


In [None]:
def scale_dot_product_2d(query, key, value, s, h, w, num_units):
    print(s, 'Q', query.shape, 'K', key.shape, 'V', value.shape)
    #Q (None, 1, 32) K (None, 4, 12, 32) V (None, 4, 12, 32)
    #m = tf.shape(query)[0]    
    hw = h * w
    c = num_units
    #relevant_score = tf.einsum('msc,mhwc->mhws', query, key) / np.sqrt(num_units)
    query = tf.reshape(query, [-1, s, c])
    query_trans = tf.transpose(query, [0, 2, 1])
    key = tf.reshape(key, [-1, hw, c])
    relevant_score = tf.matmul(key, query_trans) / np.sqrt(num_units)
    
    relevant_score_flat = tf.reshape(relevant_score, [-1, hw, s])#err
    alignment_weights = tf.nn.softmax(relevant_score_flat, axis=1)

    #alignment_prob = tf.reshape(alignment_weights, tf.shape(relevant_score))
    alignment_prob = tf.reshape(alignment_weights, (-1, hw, s))
    value = tf.reshape(value, (-1, hw, c))

    #context = tf.einsum('mhws,mhwc->msc', alignment_prob, value)
    alignment_prob = tf.transpose(alignment_prob, [0, 2, 1])#(mhws) > (m, s, hw)
    #value = tf.transpose(value, [0, 2, 1])#(m,hw,c) > (m, c, hw)
    context = tf.matmul(alignment_prob, value, name='context_matmul')

    return context, alignment_prob

In [None]:
def multi_head_attention_qkv(layers, y, x, h, w, num_units, s, head=head_n):
    num_head_unit = num_units // head
    #query = slim.fully_connected(y, num_units, activation_fn=None, scope='query')
        
    #key_value = slim.conv2d(x, num_units * 2, kernel_size=[3, 3], scope='key_value')
    
    key = layers[0](x)
    value = layers[1](x)
    #key_value = slim.fully_connected(x, num_units * 2, activation_fn=None, scope='key_value')
    #key, value = tf.split(key_value, 2, -1)
    query = y
    Q = tf.concat(tf.split(query, head, axis=-1), 0)
    K = tf.concat(tf.split(key, head, axis=-1), 0)
    V = tf.concat(tf.split(value, head, axis=-1), 0)

    context, attention = scale_dot_product_2d(Q, K, V, s, h, w, num_head_unit)  # (m * head, s, c)

    attention_head = tf.split(attention, head, axis=0)
    attention_head = tf.stack(attention_head, axis=0)
    attention = tf.reduce_mean(attention_head, axis=0)
    attention = tf.reshape(attention, [-1, s, h, w])
    
    context_list = tf.split(context, head, axis=0)
    context = tf.concat(context_list, axis=-1)
    #context = tf.cond(tf.logical_and(env.is_ensemble_multi_head, is_train), lambda: dropout_sub_head(context_list, head), lambda: tf.concat(context_list, axis=-1))
    #context = slim.fully_connected(context, num_units, activation_fn=None, scope='linear')

    return context, attention

In [None]:
def attention_layer(layers, z, nets, h, w, s, num_units):
    #z (m, S, num_units)
    #x (m, h, w, ch)

    sub_z, attention_weight = masked_multi_head_attention(layers[:2], z, num_units, s)
    #z = layer_normalize(z + slim.dropout(sub_z, keep_prob=env.drop_prob_transformer, is_training=is_train))

    sub_z, attention = multi_head_attention_qkv(layers[2:], z, nets, h, w, num_units, s)
    #z = layer_normalize(z + slim.dropout(sub_z, keep_prob=env.drop_prob_transformer, is_training=is_train))

    #sub_z = position_wise_feed_forward(z, num_units)
    #z = layer_normalize(z + slim.dropout(sub_z, keep_prob=env.drop_prob_transformer, is_training=is_train))
    
    return sub_z, attention

In [None]:
def convert_net_to_score(net, embedding_w):
    embedding_w_trans = tf.transpose(embedding_w)
    score = tf.einsum('nsc,cv->nsv', net, embedding_w_trans)
    return score

In [None]:
def decodeNet(net, num_units, h, w):    
    #embedding = keras.layers.Embedding(input_dim=num_classes, output_dim=num_units, name='embedding')    
    embedding = Dense(num_units, use_bias=False, name='dense_embedding')
    y_symbol = tf.zeros_like(net[:, 0, 0, :1], dtype=tf.int32) + SOS
    
    dense_0 = Dense(3 * num_units, name='masked_Dense_0')
    dense_1 = Dense(num_units, name='masked_Dense_1')
    dense_2 = Dense(num_classes, name='dense_to_score')
    conv_0 = Conv2D(num_units, 3, padding='same', name='key')
    conv_1 = Conv2D(num_units, 3, padding='same', name='value')
    layers = [dense_0, dense_1, conv_0, conv_1]
    values = []
    y_symbol_hot = tf.one_hot(y_symbol, num_classes)
    for i in range(max_sequence):
        #z = tf.einsum('msv,vc->msc', y_symbol, embedding_w)
        
        z = embedding(y_symbol_hot)
        #print('embedding_in', i, y_symbol)#(None, 1, 118)
        #print('embedding_out', i, z)#(None, 1, 256)
        
        z = z * (num_units ** 0.5) + position_encoding(max_sequence + 1, i + 1, num_units)
        #z = slim.dropout(z, keep_prob=env.drop_prob_transformer, is_training=is_train)
                
        z, attention = attention_layer(layers, z, net, h, w, i+1, num_units)
        print('attention',i, attention)
        #value = convert_net_to_score(z, embedding_w)
        value = dense_2(z)
        last_value = value[:, -1:]
        values.append(last_value)
        
        y_symbol_hot = tf.concat((y_symbol_hot, tf.nn.softmax(last_value)), axis=1)#decode
        #last_cls = tf.argmax(last_value, -1)
        #last_cls = tf.cast(last_cls, tf.int32)
        #y_symbol = tf.concat((y_symbol, last_cls), axis=1)    
    
    value_stack = tf.stack(values, axis=-2)
    return value, attention

In [None]:
def coordinate_map_uv(h, w):
    #return (6, 18, 256)
    x = tf.range(0.5, w, 1) / tf.cast(w, tf.float32) * 2.0 -1
    y = tf.range(0.5, h, 1) / tf.cast(h, tf.float32) * 2.0 -1
    X, Y = tf.meshgrid(x, y)
    xy = tf.stack((X, Y), -1)
    xy = tf.expand_dims(xy, axis=0)   
    return xy
 

def convert_attention_to_coord(attention, h, w, attention_threshold=0.0):
    if attention_threshold > 0:
        attention = attention * tf.cast(attention > attention_threshold, tf.float32)
        attention = attention / tf.reduce_sum(attention, [1,2], keepdims=True)
    attention_exp = tf.expand_dims(attention, -1)
    coord_map = coordinate_map_uv(h, w)
    
    #before [?,9,4,12,1] * [1,4,12,1,2]. 
    #now    [?,9,4,12,1] * [1,1,4,12,2] > [?, 9, 4, 12, 2]
    attention_coord = attention_exp * tf.expand_dims(coord_map, 1)  # (?, 6, 18, 6, 2)
    char_coord = tf.reduce_sum(attention_coord, [2, 3])  # (?, 6, 2)
    return char_coord


In [None]:
padded_image_shape, np.array(padded_image_shape)/32

In [None]:
def createModel(num_classes):
        
    inputs = Input(shape=(padded_image_shape[0], padded_image_shape[1], 3))        
    inputs_f = normalilze_4d(inputs)
            
    resnet_stride = 16
    net_h = 6 
    net_w = net_h * 3
    image_h = net_h * resnet_stride
    image_w = net_w * resnet_stride
    
    h_align = create_align_net(inputs_f)    
    
    if True:
        cx, cy, sx, sy, shear_x, shear_y = tf.squeeze(tf.split(h_align, 6, -1), -1)
        aug_align_mat = generate_transform_matrix(cx, cy, sx, sy, shear_x, shear_y)
        inputs_aligned = sampling(inputs_f, aug_align_mat[:, :2], image_h, image_w)
            
    inputs_recog = tf.image.resize(inputs_f, (image_h, image_w)) + inputs_aligned*0.001 
    net = create_resnet_backbone(inputs_recog)
    print('resnet_out', net.shape)#(None, 4, 12, 512)
    print('net_h,net_w',net_h,net_w)
        
    scores, attention = decodeNet(net, 256, net_h, net_w)
    print('decodeNet_out', net)
    print('decodeNet_out', attention)
    
    char_cxy_uv = convert_attention_to_coord(attention, net_h, net_w, 0.001)
    print('h_char_cxy_uv', char_cxy_uv)
    char_cxy_uv = tf.reshape(char_cxy_uv, [-1, max_sequence, 2])
    scores = tf.reshape(scores, [-1, max_sequence, num_classes])
    
    output_decoder = tf.concat((char_cxy_uv, scores), -1)        
    output_decoder_flat = tf.reshape(output_decoder, [-1, max_sequence * (2 + num_classes)])
    output = tf.concat((h_align, output_decoder_flat), -1)
    print('final_output', output)
    
    model = keras.Model(inputs=inputs, outputs=output)    
    return model

In [None]:
class AlignLoss(tf.losses.Loss):
    """Wrapper to combine both the losses"""

    def __init__(self):
        super(AlignLoss, self).__init__(reduction="auto", name="AlignLoss")        
        pass

    def call(self, y_true, y_pred):
        loss = tf.reduce_sum(tf.square(y_true - y_pred), -1)
        return loss

class DecodeLoss(tf.losses.Loss):
    """Wrapper to combine both the losses"""

    def __init__(self, num_classes):
        super(DecodeLoss, self).__init__(reduction="auto", name="DecodeLoss")        
        self._num_classes = num_classes
        self._gamma = 2

    def call(self, y_decoder, h_decoder):
        h_decoder = tf.reshape(h_decoder, [-1, max_sequence, 2 + self._num_classes])
        
        y_text = tf.cast(y_decoder[:, :max_sequence], tf.int64)
        y_char_cxy = y_decoder[:, max_sequence:]
        y_char_cxy = tf.reshape(y_char_cxy, [-1, max_sequence, 2])
        
        h_char_cxy = h_decoder[:, :, :2]
        h_text_score = h_decoder[:, :, 2:]
        h_text = tf.argmax(h_text_score, -1)        
        
        valid_mask_f = tf.cast(y_text > -1, tf.float32)
        valid_mask_i = tf.cast(y_text > -1, tf.int64)
        
        text_correct = tf.cast(tf.equal(y_text, h_text), tf.int64) * valid_mask_i * tf.range(max_sequence, dtype=tf.int64)#(m, seq)
        text_correct_hot = tf.one_hot(text_correct, max_sequence)#(m,seq,seq)
        text_correct_hot_axis_1 = tf.reduce_sum(text_correct_hot, axis=1)
        loss_text_mask = tf.concat((tf.ones_like(text_correct_hot_axis_1[:, :1]), text_correct_hot_axis_1[:, :-1]), -1)
        loss_text_mask = tf.cast(loss_text_mask, tf.float32)
        
        weight_char = tf.range(max_sequence, dtype=tf.float32)[::-1]
        weight_char = tf.reshape(weight_char, [1, -1]) / (max_sequence/2)
        
        y_hot = tf.one_hot(y_text, self._num_classes)
        cls_pt = tf.nn.softmax(h_text_score)        
        cls_pt = tf.clip_by_value(cls_pt, 1e-7, 1.0 - 1e-7)
        loss_cls_p = - tf.pow(1.0 - cls_pt, self._gamma) * y_hot * tf.math.log(cls_pt)
        loss_cls_f = - tf.pow(cls_pt, self._gamma) * (1 - y_hot) * tf.math.log(1 - cls_pt)
        
        loss_cls = loss_text_mask * tf.reduce_sum(loss_cls_p + loss_cls_f, axis=-1)
        loss_cxy = tf.reduce_sum(tf.abs(y_char_cxy - h_char_cxy), -1)
        
        loss = tf.boolean_mask(loss_cxy + loss_cls, y_text > -1)
        #loss = valid_mask_f * (loss_cxy + 0.1 * loss_cls)
        return loss


class NetLoss(tf.losses.Loss):
    """Wrapper to combine both the losses"""

    def __init__(self, num_classes):
        super(NetLoss, self).__init__(reduction="auto", name="NetLoss")        
        self._alignLoss = AlignLoss()
        self._decodeLoss = DecodeLoss(num_classes)

    def call(self, y_true, y_pred):
        y_align = y_true[:, :6]
        y_decoder = y_true[:, 6:]
        
        h_align = y_pred[:, :6]
        h_decoder = y_pred[:, 6:]
        
        loss_align = self._alignLoss(y_align, h_align)
        loss_decode = self._decodeLoss(y_decoder, h_decoder)        
        loss = loss_align + loss_decode
        return loss

In [None]:
def distance(y_true, y_pred):
    y_align = y_true[:, :6]
    y_decoder = y_true[:, 6:]

    h_align = y_pred[:, :6]
    h_decoder = y_pred[:, 6:]
    
    return tf.reduce_mean(tf.abs(y_align - h_align))

def _acc(y_true, y_pred):    
    y_text = tf.cast(y_true[:, :max_sequence], tf.int64)
    y_char_cxy = y_true[:, max_sequence:]
    y_char_cxy = tf.reshape(y_char_cxy, [-1, max_sequence, 2])

    h_char_cxy = y_pred[:, :, :2]
    h_text_score = y_pred[:, :, 2:]
    h_text = tf.argmax(h_text_score, -1)

    valid_mask = y_text > -1    
    
    acc = tf.boolean_mask(tf.equal(y_text, h_text), valid_mask)    
    return acc

def accuracy(y_true, y_pred):
    y_align = y_true[:, :6]
    y_decoder = y_true[:, 6:]

    h_align = y_pred[:, :6]
    h_decoder = y_pred[:, 6:]
    
    h_decoder = tf.reshape(h_decoder, [-1, max_sequence, 2 + num_classes])      
    return _acc(y_decoder, h_decoder)


def dist_vertex(y_true, y_pred):
    y_align = y_true[:, :6]
    y_decoder = y_true[:, 6:]

    h_align = y_pred[:, :6]
    h_decoder = y_pred[:, 6:]
    
    y_text = tf.cast(y_decoder[:, :max_sequence], tf.int64)
    y_char_cxy = y_decoder[:, max_sequence:]
    y_char_cxy = tf.reshape(y_char_cxy, [-1, max_sequence, 2])
    
    h_decoder = tf.reshape(h_decoder, [-1, max_sequence, 2 + num_classes])        
    h_cxy = h_decoder[:,:,:2]
    
    valid_mask = y_text > -1    
    dist = tf.reduce_mean(tf.abs(y_char_cxy - h_cxy), -1)
    return tf.boolean_mask(dist, valid_mask)    

In [None]:
def load_weight():   
    weights_dir = path_weight
    #latest_checkpoint = tf.train.latest_checkpoint(weights_dir)
    latest_checkpoint = weights_dir 
    print('latest_checkpoint', latest_checkpoint)
    model.load_weights(weights_dir)

In [None]:
input_list_train = x_list
bbox_list_train = y_list
len(input_list_train), len(bbox_list_train), type(input_list_train),input_list_train[0].shape, bbox_list_train[0].shape

In [None]:
label_encoder = LabelEncoder()

In [None]:
def generator():
    m = len(input_list_train)
    
    for i in range(m):
        x = input_list_train[i]
        y_box = bbox_list_train[i]        
        yield (x, y_box)

dataset = tf.data.Dataset.from_generator(
    generator, 
    output_types=(tf.uint8, tf.float32), 
    output_shapes=(tf.TensorShape([None, None, 3]), tf.TensorShape([36])))

In [None]:
#dataset = tf.data.Dataset.from_tensor_slices((input_list_train, bbox_list_train))

In [None]:
np.set_printoptions(precision=2)
print(dataset)
for example in tfds.as_numpy(dataset):
    print('example', len(example))
    image = example[0]
    bbox = example[1]    
    print(image.dtype, bbox.dtype, image.shape, bbox.shape, bbox)
    break

In [None]:
batch_size = 1
autotune = tf.data.experimental.AUTOTUNE
#dataset = dataset.batch(batch_size)
train_dataset = dataset.map(preprocess_data, num_parallel_calls=autotune)
#train_dataset = train_dataset.batch(batch_size)
#train_dataset = train_dataset.padded_batch(batch_size=batch_size)
#train_dataset = train_dataset.padded_batch(batch_size=batch_size, padding_values=(0.0, 1e-8, -1), drop_remainder=True)
train_dataset = train_dataset.map(label_encoder.encode_batch, num_parallel_calls=autotune)
train_dataset = train_dataset.apply(tf.data.experimental.ignore_errors())
train_dataset = train_dataset.prefetch(autotune)


In [None]:
for x, y in train_dataset:
    print(y.shape)
    y = y[0]
    y_align = y[:6]
    y_recog = y[6:]
    y_recog_text = y_recog[:max_sequence]
    y_recog_cxy = y_recog[max_sequence:]
    print(y_align)
    print('y_recog_text', y_recog_text)
    print('y_recog_cxy', tf.reshape(y_recog_cxy, [-1, 2]))
    break

In [None]:
#optimizer = tf.optimizers.SGD(learning_rate=1e-5, clipvalue=10.)#warm up clipvalue=10. !
optimizer = tf.optimizers.SGD(learning_rate=1e-1, clipvalue=10.)#, clipvalue=10.
loss_fn = NetLoss(num_classes)

model = createModel(num_classes)
model.compile(loss=loss_fn , optimizer=optimizer, metrics=[distance, dist_vertex,accuracy])#[distance, accuracy]

callbacks_list = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath=path_weight,
        monitor="loss",
        save_best_only=False,
        save_weights_only=True,
        verbose=0,
        save_freq=500
    )
]

In [None]:
model.load_weights(path_weight)

In [None]:
epochs = 10000
hist = model.fit(
    train_dataset.take(10000),
    validation_data=None,#val_dataset.take(2)
    epochs=epochs, 
    callbacks=callbacks_list,#callbacks_list
    verbose=1,
)# b1:64ms, b2:62ms, b5:62ms

In [None]:
model.save_weights(path_weight)

In [None]:
for image, label in train_dataset: 
    
    y_decode = label[:, 6:]
    y_cls = tf.cast(y_decode[:, :max_sequence], tf.int64)
    y_cxy = tf.reshape(y_decode[:, max_sequence:],[-1,max_sequence,2])
    output = model.predict(image)
    #print('output', output.shape)
    output_decode = tf.reshape(output[:, 6:], [-1, max_sequence, 2 + num_classes])
    cxy = output_decode[:, :, :2]
    cls_score = output_decode[:, :, 2:]
    #h_text_score = y_pred[:, :, 2:]
    #h_text = tf.argmax(h_text_score, -1)
    print('cls_score', cls_score.shape)
    cls = tf.argmax(cls_score, -1)    
    print('y', y_cls)
    print('h', cls)
    #print('y_cxy', y_cxy)
    #print('cxy',cxy)
    print('')
    break
    

In [None]:
plt.figure(figsize=(15,10))
plt.imshow(x_sample_concat)

### tflite

In [None]:
# Convert the model.
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

In [None]:
# Save the TF Lite model.
with tf.io.gfile.GFile('model.tflite', 'wb') as f:
  f.write(tflite_model)
!ls *.tflite

In [None]:
input_sample = input_list_train[0]
input_sample.shape

In [None]:
img = Image.fromarray(input_sample[52:-52,255:-255])
img

In [None]:
img_resized = img.resize((padded_image_shape[1],padded_image_shape[0]))
img_resized

In [None]:
input_sample = np.expand_dims(np.array(img_resized), 0)

In [None]:
num_classes, num_classes * max_sequence, (2+num_classes) * max_sequence, 6+ (2+num_classes) * max_sequence

In [None]:
# Load TFLite model and allocate tensors.
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()

# Get input and output tensors.
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print('input_details', input_details)
print('output_details', output_details)

# Test the TensorFlow Lite model on random input data.
input_shape = input_details[0]['shape']
input_data = np.array(input_sample, dtype=np.float32)
interpreter.set_tensor(input_details[0]['index'], input_data)

interpreter.invoke()

# The function `get_tensor()` returns a copy of the tensor data.
# Use `tensor()` in order to get a pointer to the tensor.
tflite_results = interpreter.get_tensor(output_details[0]['index'])
tflite_results.shape

In [None]:
out_align = tflite_results[0, :6]
out_recog = tflite_results[0, 6:]
out_recog = np.reshape(out_recog, [2+num_classes, max_sequence])
out_recog_cx = out_recog[:2]
out_recog_text = out_recog[2:]
cls_arg_max = np.argmax(out_recog_text, 0)
tflite_results.shape, cls_arg_max

In [None]:
for var in model.trainable_variables:
    print(var.name, var.shape)