In [None]:

import tensorflow as tf

import numpy as np

# HTR Dependency
import cv2
import string
import h5py

keras = tf.keras
print(tf.__version__)



In [None]:
#2.1.0-dev20191224

In [None]:
!nvidia-smi

In [None]:
import html
import re

"""
DeepSpell based text cleaning process.
    Tal Weiss.
    Deep Spelling.
    Medium: https://machinelearnings.co/deep-spelling-9ffef96a24f6#.2c9pu8nlm
    Github: https://github.com/MajorTal/DeepSpell
"""

RE_DASH_FILTER = re.compile(r'[\-\˗\֊\‐\‑\‒\–\—\⁻\₋\−\﹣\－]', re.UNICODE)
RE_APOSTROPHE_FILTER = re.compile(r'&#39;|[ʼ՚＇‘’‛❛❜ߴߵ`‵´ˊˋ{}{}{}{}{}{}{}{}{}]'.format(
    chr(768), chr(769), chr(832), chr(833), chr(2387),
    chr(5151), chr(5152), chr(65344), chr(8242)), re.UNICODE)
RE_RESERVED_CHAR_FILTER = re.compile(r'[¶¤«»]', re.UNICODE)
RE_LEFT_PARENTH_FILTER = re.compile(r'[\(\[\{\⁽\₍\❨\❪\﹙\（]', re.UNICODE)
RE_RIGHT_PARENTH_FILTER = re.compile(r'[\)\]\}\⁾\₎\❩\❫\﹚\）]', re.UNICODE)
RE_BASIC_CLEANER = re.compile(r'[^\w\s{}]'.format(re.escape(string.punctuation)), re.UNICODE)

LEFT_PUNCTUATION_FILTER = """!%&),.:;<=>?@\\]^_`|}~"""
RIGHT_PUNCTUATION_FILTER = """"(/<=>@[\\^_`{|~"""
NORMALIZE_WHITESPACE_REGEX = re.compile(r'[^\S\n]+', re.UNICODE)

def text_standardize(text):
    """Organize/add spaces around punctuation marks"""

    if text is None:
        return ""

    text = html.unescape(text).replace("\\n", "").replace("\\t", "")

    text = RE_RESERVED_CHAR_FILTER.sub("", text)
    text = RE_DASH_FILTER.sub("-", text)
    text = RE_APOSTROPHE_FILTER.sub("'", text)
    text = RE_LEFT_PARENTH_FILTER.sub("(", text)
    text = RE_RIGHT_PARENTH_FILTER.sub(")", text)
    text = RE_BASIC_CLEANER.sub("", text)

    text = text.lstrip(LEFT_PUNCTUATION_FILTER)
    text = text.rstrip(RIGHT_PUNCTUATION_FILTER)
    text = text.translate(str.maketrans({c: f" {c} " for c in string.punctuation}))
    text = NORMALIZE_WHITESPACE_REGEX.sub(" ", text.strip())

    return text

"""
Sauvola binarization based in,
    J. Sauvola, T. Seppanen, S. Haapakoski, M. Pietikainen,
    Adaptive Document Binarization, in IEEE Computer Society Washington, 1997.
"""

def sauvola(img, window, thresh, k):
    """Sauvola binarization"""

    rows, cols = img.shape
    pad = int(np.floor(window[0] / 2))
    sum2, sqsum = cv2.integral2(
        cv2.copyMakeBorder(img, pad, pad, pad, pad, cv2.BORDER_CONSTANT))

    isum = sum2[window[0]:rows + window[0], window[1]:cols + window[1]] + \
        sum2[0:rows, 0:cols] - \
        sum2[window[0]:rows + window[0], 0:cols] - \
        sum2[0:rows, window[1]:cols + window[1]]

    isqsum = sqsum[window[0]:rows + window[0], window[1]:cols + window[1]] + \
        sqsum[0:rows, 0:cols] - \
        sqsum[window[0]:rows + window[0], 0:cols] - \
        sqsum[0:rows, window[1]:cols + window[1]]

    ksize = window[0] * window[1]
    mean = isum / ksize
    std = (((isqsum / ksize) - (mean**2) / ksize) / ksize) ** 0.5
    threshold = (mean * (1 + k * (std / thresh - 1))) * (mean >= 100)

    return np.asarray(255 * (img >= threshold), 'uint8')

def remove_cursive_style(img):
    """Remove cursive writing style from image with deslanting algorithm"""

    def calc_y_alpha(vec):
        indices = np.where(vec > 0)[0]
        h_alpha = len(indices)

        if h_alpha > 0:
            delta_y_alpha = indices[h_alpha - 1] - indices[0] + 1

            if h_alpha == delta_y_alpha:
                return h_alpha * h_alpha
        return 0

    alpha_vals = [-1.0, -0.75, -0.5, -0.25, 0.0, 0.25, 0.5, 0.75, 1.0]
    rows, cols = img.shape
    results = []

    ret, otsu = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    binary = otsu if ret < 127 else sauvola(img, (int(img.shape[0] / 2), int(img.shape[0] / 2)), 127, 1e-2)

    for alpha in alpha_vals:
        shift_x = max(-alpha * rows, 0.)
        size = (cols + int(np.ceil(abs(alpha * rows))), rows)
        transform = np.asarray([[1, alpha, shift_x], [0, 1, 0]], dtype=np.float)

        shear_img = cv2.warpAffine(binary, transform, size, cv2.INTER_NEAREST)
        sum_alpha = 0
        sum_alpha += np.apply_along_axis(calc_y_alpha, 0, shear_img)
        results.append([np.sum(sum_alpha), size, transform])

    result = sorted(results, key=lambda x: x[0], reverse=True)[0]
    warp = cv2.warpAffine(img, result[2], result[1], borderValue=255)

    return cv2.resize(warp, dsize=(cols, rows))

def preproc(img, input_size):
    """Make the process with the `input_size` to the scale resize"""
    img_src = img
    if isinstance(img, str):
        img = cv2.imread(img, cv2.IMREAD_GRAYSCALE)

    if isinstance(img, tuple):
        image, boundbox = img
        img = cv2.imread(image, cv2.IMREAD_GRAYSCALE)

        for i in range(len(boundbox)):
            if isinstance(boundbox[i], float):
                total = len(img) if i < 2 else len(img[0])
                boundbox[i] = int(total * boundbox[i])

        img = np.asarray(img[boundbox[0]:boundbox[1], boundbox[2]:boundbox[3]], dtype=np.uint8)

    wt, ht, _ = input_size
    try:
        h, w = np.asarray(img).shape
    except Exception as e:
        print(f"ERROR: {img_src}")
        return

    f = max((w / wt), (h / ht))

    new_size = (max(min(wt, int(w / f)), 1), max(min(ht, int(h / f)), 1))
    img = cv2.resize(img, new_size)

    _, binary = cv2.threshold(img, 254, 255, cv2.THRESH_BINARY)

    if np.sum(img) * 0.8 > np.sum(binary):
        img = illumination_compensation(img)

    img = remove_cursive_style(img)

    target = np.ones([ht, wt], dtype=np.uint8) * 255
    target[0:new_size[1], 0:new_size[0]] = img
    img = cv2.transpose(target)

    return img

def normalization(imgs):
    """Normalize list of images"""

    imgs = np.asarray(imgs).astype(np.float32)
    _, h, w = imgs.shape

    for i in range(len(imgs)):
        m, s = cv2.meanStdDev(imgs[i])
        imgs[i] = imgs[i] - m[0][0]
        imgs[i] = imgs[i] / s[0][0] if s[0][0] > 0 else imgs[i]

    return np.expand_dims(imgs, axis=-1)


def augmentation(imgs,
                 rotation_range=0,
                 scale_range=0,
                 height_shift_range=0,
                 width_shift_range=0,
                 dilate_range=1,
                 erode_range=1):
    """Apply variations to a list of images (rotate, width and height shift, scale, erode, dilate)"""

    imgs = imgs.astype(np.float32)
    _, h, w = imgs.shape

    dilate_kernel = np.ones((int(np.random.uniform(1, dilate_range)),), np.uint8)
    erode_kernel = np.ones((int(np.random.uniform(1, erode_range)),), np.uint8)
    height_shift = np.random.uniform(-height_shift_range, height_shift_range)
    rotation = np.random.uniform(-rotation_range, rotation_range)
    scale = np.random.uniform(1 - scale_range, 1)
    width_shift = np.random.uniform(-width_shift_range, width_shift_range)

    trans_map = np.float32([[1, 0, width_shift * w], [0, 1, height_shift * h]])
    rot_map = cv2.getRotationMatrix2D((w // 2, h // 2), rotation, scale)

    trans_map_aff = np.r_[trans_map, [[0, 0, 1]]]
    rot_map_aff = np.r_[rot_map, [[0, 0, 1]]]
    affine_mat = rot_map_aff.dot(trans_map_aff)[:2, :]

    for i in range(len(imgs)):
        imgs[i] = cv2.warpAffine(imgs[i], affine_mat, (w, h), flags=cv2.INTER_NEAREST, borderValue=255)
        imgs[i] = cv2.erode(imgs[i], erode_kernel, iterations=1)
        imgs[i] = cv2.dilate(imgs[i], dilate_kernel, iterations=1)

    return imgs

In [None]:
"""
Uses generator functions to supply train/test with data.
Image renderings and text are created on the fly each time.
"""

from tensorflow.keras.preprocessing.sequence import pad_sequences

import h5py
import numpy as np
import unicodedata


class DataGenerator():
    """Generator class with data streaming"""

    def __init__(self, source, batch_size, charset, max_text_length, predict=False):
        self.tokenizer = Tokenizer(charset, max_text_length)
        self.batch_size = batch_size
        self.partitions = ['test'] if predict else ['train', 'valid', 'test']

        self.size = dict()
        self.steps = dict()
        self.index = dict()
        self.dataset = dict()

        with h5py.File(source, "r") as f:
            for pt in self.partitions:
                self.dataset[pt] = dict()
                self.dataset[pt]['dt'] = f[pt]['dt'][:]
                self.dataset[pt]['gt'] = f[pt]['gt'][:]

        for pt in self.partitions:
            # decode sentences from byte
            self.dataset[pt]['gt'] = [x.decode() for x in self.dataset[pt]['gt']]

            # set size and setps
            self.size[pt] = len(self.dataset[pt]['gt'])
            self.steps[pt] = int(np.ceil(self.size[pt] / self.batch_size))
            self.index[pt] = 0

    def next_train_batch(self):
        """Get the next batch from train partition (yield)"""

        while True:
            if self.index['train'] >= self.size['train']:
                self.index['train'] = 0

            index = self.index['train']
            until = self.index['train'] + self.batch_size
            self.index['train'] = until

            x_train = self.dataset['train']['dt'][index:until]
            y_train = self.dataset['train']['gt'][index:until]

            x_train = augmentation(x_train,
                                      rotation_range=1.5,
                                      scale_range=0.05,
                                      height_shift_range=0.025,
                                      width_shift_range=0.05,
                                      erode_range=5,
                                      dilate_range=3)

            x_train = normalization(x_train)

            y_train = [self.tokenizer.encode(y) for y in y_train]
            y_train = pad_sequences(y_train, maxlen=self.tokenizer.maxlen, padding="post")

            yield (x_train, y_train, [])

    def next_valid_batch(self):
        """Get the next batch from validation partition (yield)"""

        while True:
            if self.index['valid'] >= self.size['valid']:
                self.index['valid'] = 0

            index = self.index['valid']
            until = self.index['valid'] + self.batch_size
            self.index['valid'] = until

            x_valid = self.dataset['valid']['dt'][index:until]
            y_valid = self.dataset['valid']['gt'][index:until]

            x_valid = normalization(x_valid)

            y_valid = [self.tokenizer.encode(y) for y in y_valid]
            y_valid = pad_sequences(y_valid, maxlen=self.tokenizer.maxlen, padding="post")

            yield (x_valid, y_valid, [])

    def next_test_batch(self):
        """Return model predict parameters"""

        while True:
            if self.index['test'] >= self.size['test']:
                self.index['test'] = 0
                break

            index = self.index['test']
            until = self.index['test'] + self.batch_size
            self.index['test'] = until

            x_test = self.dataset['test']['dt'][index:until]
            x_test = normalization(x_test)

            yield x_test

In [None]:
class Tokenizer():
    """Manager tokens functions and charset/dictionary properties"""

    def __init__(self, chars, max_text_length=128):
        self.PAD_TK, self.UNK_TK = "¶", "¤"
        self.chars = (self.PAD_TK + self.UNK_TK + chars)

        self.PAD = self.chars.find(self.PAD_TK)
        self.UNK = self.chars.find(self.UNK_TK)

        self.vocab_size = len(self.chars)
        self.maxlen = max_text_length

    def encode(self, text):
        """Encode text to vector"""

        text = unicodedata.normalize("NFKD", text).encode("ASCII", "ignore").decode("ASCII")
        text = " ".join(text.split())
        encoded = []

        for item in text:
            index = self.chars.find(item)
            index = self.UNK if index == -1 else index
            encoded.append(index)

        return np.asarray(encoded)

    def decode(self, text):
        """Decode vector to text"""

        decoded = "".join([self.chars[int(x)] for x in text if x > -1])
        decoded = self.remove_tokens(decoded)
        decoded = text_standardize(decoded)

        return decoded

    def remove_tokens(self, text):
        """Remove tokens (PAD) from text"""

        return text.replace(self.PAD_TK, "")

In [None]:
%cd '/home/kuadmin01/terng/'
!ls

In [None]:
SPACE = " "
SPECIAL_CHARS = "?!,."
ALPHANUMERIC = string.printable[:62]
CHARS = ALPHANUMERIC + SPECIAL_CHARS + SPACE
CHARS

INPUT_SOURCE_NAME = "iam_word"
BATCH_SIZE = 16
MAX_TEXT_LENGTH = 32
CHARSET_BASE = CHARS

dtgen = DataGenerator(
          source=f"/home/kuadmin01/terng/Dataset/dataset_for_experiment.hdf5",
          batch_size=BATCH_SIZE,
          charset=CHARSET_BASE,
          max_text_length=MAX_TEXT_LENGTH,
          predict=False
        )

In [None]:
def ctc_loss(y_true, y_pred):
    """Function for computing the CTC loss"""
    
    if len(y_true.shape) > 2:
        y_true = tf.squeeze(y_true)

    input_length = tf.math.reduce_sum(y_pred, axis=-1, keepdims=False)
    input_length = tf.math.reduce_sum(input_length, axis=-1, keepdims=True)
    label_length = tf.math.count_nonzero(y_true, axis=-1, keepdims=True, dtype="int64")

    loss = keras.backend.ctc_batch_cost(y_true, y_pred, input_length, label_length)
    loss = tf.reduce_mean(loss)

    return loss

In [None]:
from Models.HTR_Models import FlorHTR, SmallFlorHTR, PuigCerver

In [None]:
INPUT_SHAPE = (1024, 128, 1)
OUTPUT_SHAPE = dtgen.tokenizer.vocab_size + 1

inputs, outputs = FlorHTR(input_shape=INPUT_SHAPE, output_shape=OUTPUT_SHAPE)
model = keras.Model(inputs=inputs, outputs=outputs)
model.compile(optimizer=keras.optimizers.RMSprop(learning_rate=5e-4), loss=ctc_loss)
model.summary()

In [None]:
from tensorflow.keras.callbacks import CSVLogger, TensorBoard, ModelCheckpoint
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
import datetime




def Callback(source, model_name):
    
    log_dir="logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    callbacks = [
    ModelCheckpoint(
      filepath=f"target/"+ str(model_name) + "/" + str(source)  + "_checkpoint_weights.hdf5",
      monitor="val_loss",
      save_best_only=True,
      save_weights_only=True,
      verbose=True
    ),
    EarlyStopping(
      monitor="val_loss",
      min_delta=1e-8,
      patience=20,
      restore_best_weights=True,
      verbose=True
    ),
    ReduceLROnPlateau(
      monitor="val_loss",
      min_delta=1e-8,
      factor=0.2,
      patience=15,
      verbose=True
    ),
    CSVLogger(
      filename=f"log/"+ str(model_name) + "/" + str(source)  + "_epochs.log",
      separator=";",
      append=True
    ),
      tensorboard_callback
    ]
    return callbacks


In [None]:
callbacks = Callback(INPUT_SOURCE_NAME, 'Flor')

In [None]:
# model.load_weights(f"/home/kuadmin01/terng/SeniorProjectMaterial/target/Flor/iam_word_checkpoint_weights.hdf5")

In [None]:
%cd SeniorProjectMaterial/
EPOCHS = 10
history = model.fit(x=dtgen.next_train_batch(),
            epochs=EPOCHS,
            steps_per_epoch=dtgen.steps['train'],
            validation_data=dtgen.next_valid_batch(),
            validation_steps=dtgen.steps['valid'],
            callbacks=callbacks,
            shuffle=True,
            verbose=1
          )

In [None]:
# !rm -rf ./logs/ 
%load_ext tensorboard

In [3]:
%reload_ext tensorboard

In [6]:
%tensorboard --logdir logs/Terng_HTR3/

Reusing TensorBoard on port 6010 (pid 109167), started 0:04:24 ago. (Use '!kill 109167' to kill it.)

In [7]:

%tensorboard --logdir logs/Flor/fit

Reusing TensorBoard on port 6008 (pid 47757), started 5 days, 20:15:44 ago. (Use '!kill 47757' to kill it.)

In [None]:
!cd SeniorProjectMaterial/


In [None]:
!mkdir -p saved_model
model.save(f"saved_model/Flor/{INPUT_SOURCE_NAME}_htr")

In [None]:
converter = tf.lite.TFLiteConverter.from_saved_model(f"saved_model/Flor/{INPUT_SOURCE_NAME}_htr")

# converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS,
#                                        tf.lite.OpsSet.SELECT_TF_OPS]

converter.experimental_new_converter = True
tflite_model = converter.convert()
open(f"{INPUT_SOURCE_NAME}_Flor_htr.tflite", "wb").write(tflite_model)

In [None]:
%cd SeniorProjectMaterial/

In [None]:
PREDICT_IMAGE_SRC = "hello.png"
tokenizer = Tokenizer(chars=CHARS, max_text_length=MAX_TEXT_LENGTH)
img = preproc(PREDICT_IMAGE_SRC, input_size=INPUT_SHAPE)
x_test = normalization([ img ])

STEPS = 1

out = model.predict(
        x=x_test,
        batch_size=None,
        verbose=False,
        steps=STEPS,
        callbacks=None, 
        max_queue_size=10,
        workers=1,
        use_multiprocessing=False
      )




In [None]:
steps_done = 0
batch_size = int(np.ceil(len(out) / STEPS))
input_length = len(max(out, key=len))
predicts, probabilities = [], []

while steps_done < STEPS:
    index = steps_done * batch_size
    until = index + batch_size

    x_test = np.asarray(out[index:until])
    x_test_len = np.asarray([input_length for _ in range(len(x_test))])

    decode, log = keras.backend.ctc_decode(
                  x_test,
                  x_test_len,
                  greedy=True,
                  beam_width=10,
                  top_paths=3
                )

    probabilities.extend([np.exp(x) for x in log])
    decode = [[[int(p) for p in x if p != -1] for x in y] for y in decode]
    predicts.extend(np.swapaxes(decode, 0, 1))
    # update step
    steps_done += 1

In [None]:
#Test
steps_done = 0
batch_size = int(np.ceil(len(out) / STEPS))
input_length = len(max(out, key=len))
predicts, probabilities = [], []

while steps_done < STEPS:
    index = steps_done * batch_size
    until = index + batch_size

    x_test = np.asarray(out[index:until])
    x_test_len = np.asarray([input_length for _ in range(len(x_test))])

    decode, log = tf.nn.ctc_beam_search_decoder(
                  x_test,
                  128
                )

    probabilities.extend([np.exp(x) for x in log])
    decode = [[[int(p) for p in x if p != -1] for x in y] for y in decode]
    predicts.extend(np.swapaxes(decode, 0, 1))
    # update step
    steps_done += 1

In [None]:
predicts = [[tokenizer.decode(x) for x in y] for y in predicts]

In [None]:
print("\n####################################")
for i, (pred, prob) in enumerate(zip(predicts, probabilities)):
  print("\nProb.  - Predict")
  for (pd, pb) in zip(pred, prob):
    print(f"{pb:.4f} - {pd}")
print("\n####################################")

In [None]:
!kill 141931