In [10]:
import pickle
import tensorflow as tf
import numpy as np

import matplotlib.pyplot as plt
from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Reshape, Dense, Bidirectional, Dropout, LSTM 
,StringLookup, ConvLSTM2D)
from tensorflow.keras import Model
from tensorflow.keras.backend import ctc_decode, ctc_batch_cost
from tensorflow.keras.layers import Layer

import Levenshtein as lev

In [None]:
with open('./labeltensor_shpad.pkl','rb') as ry:
    lab_tensor_sh = pickle.load(ry)

In [None]:
with open('./imgtensor_trsh.pkl','rb') as wabi:
    img_tensor_trsh = pickle.load(wabi)

In [2]:
charlist = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz .,-+1234567890'

In [3]:
def CTCLoss(y_true, y_pred):
    # Compute the training-time loss value
    batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
    input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
    label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

    input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
    label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

    loss = ctc_batch_cost(y_true, y_pred, input_length, label_length)
    return loss

In [4]:
def decode_preds(pred_model,start,end):
    preds = pred_model.predict(img_tensor_trsh[start:end])
    len_tensor = tf.convert_to_tensor([len([l for l in lab if l>0]) for lab in lab_tensor_sh[start:end]])
    pred_texts = ctc_decode(preds,len_tensor,greedy=True)
    guess_list =  [''.join([charlist[p-1] for p in pred.numpy() if p > -1]) for pred in pred_texts[0][0]]
    prob_list = pred_texts[1].numpy()
    return guess_list,prob_list

In [5]:
def post_distance(pred_model,start,end):
    preds, _ = decode_preds(pred_model,start,end)
    trues = [''.join([charlist[p-1] for p in lab.numpy()]) for lab in lab_tensor_sh[start:end]]
    return sum([lev.distance(trues[i],preds[i]) for i in range(end-start)])/(end-start)
# you can normalize the lev distance by dividing by length of longest word, but we'll leave that for now

In [22]:
input_img = Input(shape=(679,480,1),name='image',dtype="float32")


x = Conv2D(32,(3,3),activation='relu',name='Conv1')(input_img)
x = MaxPooling2D((2,2), name = "pool1")(x)
x = Dropout(0.3,name = 'drop1')(x)


x = Conv2D(64,(3,3),activation='relu',name='Conv2')(x)
x = MaxPooling2D((2,2), name = "pool2")(x)
x = Dropout(0.3, name = 'drop2')(x)

x = Conv2D(128,(4,4), activation= 'relu',name='Conv3')(x)
x = Conv2D(128,(5,5),activation='relu', name = 'Conv4')(x)
x = MaxPooling2D((2,2), name = 'pool3')(x)
x = Dropout(0.3,name = 'drop3')(x)

x = Conv2D(128,(5,5), activation= 'relu', name = 'Conv5')(x)

shape = (76,6528)

x = Reshape(target_shape=shape, name = 'reshape')(x)
x = Dense(64, activation='relu', name ='dense1')(x)
x = Dropout(0.2,name = 'drop4')(x)

x = Bidirectional(LSTM(256,return_sequences=True, dropout=0.25), name = 'LSTM1')(x)
x = Bidirectional(LSTM(128,return_sequences=True,dropout=0.25), name = 'LSTM2')(x)
x = Bidirectional(LSTM(64,return_sequences=True,dropout=0.25), name = 'LSTM3')(x)



output = Dense(len(charlist)+2,activation='softmax',name='dense2')(x)


model = Model(input_img,output,name='basic-OCRplus')
model.compile(optimizer='adam',loss=CTCLoss)

In [23]:
model.summary()

Model: "basic-OCRplus"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 image (InputLayer)          [(None, 679, 480, 1)]     0         
                                                                 
 Conv1 (Conv2D)              (None, 677, 478, 32)      320       
                                                                 
 pool1 (MaxPooling2D)        (None, 338, 239, 32)      0         
                                                                 
 drop1 (Dropout)             (None, 338, 239, 32)      0         
                                                                 
 Conv2 (Conv2D)              (None, 336, 237, 64)      18496     
                                                                 
 pool2 (MaxPooling2D)        (None, 168, 118, 64)      0         
                                                                 
 drop2 (Dropout)             (None, 168, 118, 64)    

In [None]:
eps = 100
history = model.fit(img_tensor_trsh,lab_tensor_sh,validation_split=0.2, epochs=eps, batch_size=128)

In [None]:
pred_model = Model(model.get_layer(name='image').input, model.get_layer(name='dense2').output)

In [None]:
guesses, probs = decode_preds(pred_model,0,1200)

In [None]:
lev_d = post_distance(pred_model,0,1407)