In [None]:
%pip install python-Levenshtein

In [1]:
import pickle
import tensorflow as tf
import numpy as np

import matplotlib.pyplot as plt
from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Reshape, Dense, Bidirectional, Dropout, LSTM 
,StringLookup, ConvLSTM2D)
from tensorflow.keras import Model
from tensorflow.keras.backend import ctc_decode, ctc_batch_cost
from tensorflow.keras.layers import Layer

import Levenshtein as lev

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
with open('./labeltensor_shpad.pkl','rb') as ry:
    lab_tensor_sh = pickle.load(ry)

with open('./drive/MyDrive/imgtensor_trsh.pkl','rb') as arbi:
    img_tensor_trsh = pickle.load(arbi)

charlist = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz .,-+1234567890'

def CTCLoss(y_true, y_pred):
    # Compute the training-time loss value
    #trying this method here-  
    #https://stackoverflow.com/questions/64321779/how-to-use-tf-ctc-loss-with-variable-length-features-and-labels
    label_length = tf.math.count_nonzero(y_true, axis= -1, keepdims = True)
    input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
    batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
    input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")

    loss = ctc_batch_cost(y_true, y_pred, input_length, label_length)
    return tf.reduce_mean(loss)

def decode_preds(pred_model,start,end):
    preds = pred_model.predict(img_tensor_trsh[start:end])
    len_tensor = tf.convert_to_tensor([len([l for l in lab if l>0]) for lab in lab_tensor_sh[start:end]])
    pred_texts = ctc_decode(preds,len_tensor,greedy=True)
    guess_list =  [''.join([charlist[p-1] for p in pred.numpy() if p > -1]) for pred in pred_texts[0][0]]
    prob_list = pred_texts[1].numpy()
    return guess_list,prob_list

def post_distance(pred_model,start,end):
    preds, _ = decode_preds(pred_model,start,end)
    trues = [''.join([charlist[p-1] for p in lab.numpy() if p>0]) for lab in lab_tensor_sh[start:end]]
    return np.mean([lev.distance(trues[i],preds[i]) for i in range(end-start)])
# you can normalize the lev distance by dividing by length of longest word, but we'll leave that for now

In [None]:
input_img = Input(shape=(679,480,1),name='image',dtype="float32")


x = Conv2D(32,(3,3),activation='relu',name='Conv1')(input_img)
x = MaxPooling2D((2,2), name = "pool1")(x)
x = Dropout(0.3,name = 'drop1')(x)


x = Conv2D(64,(3,3),activation='relu',name='Conv2')(x)
x = MaxPooling2D((2,2), name = "pool2")(x)
x = Dropout(0.3, name = 'drop2')(x)

x = Conv2D(128,(4,4), activation= 'relu',name='Conv3')(x)
x = Conv2D(128,(5,5),activation='relu', name = 'Conv4')(x)
x = MaxPooling2D((2,2), name = 'pool3')(x)
x = Dropout(0.3,name = 'drop3')(x)

x = Conv2D(128,(5,5), activation= 'relu', name = 'Conv5')(x)

shape = (152,3264)

x = Reshape(target_shape=shape, name = 'reshape')(x)
x = Dense(64, activation='relu', name ='dense1')(x)
x = Dropout(0.2,name = 'drop4')(x)

x = Bidirectional(LSTM(256,return_sequences=True, dropout=0.25), name = 'LSTM1')(x)
x = Bidirectional(LSTM(128,return_sequences=True,dropout=0.25), name = 'LSTM2')(x)
x = Bidirectional(LSTM(64,return_sequences=True,dropout=0.25), name = 'LSTM3')(x)
x = Bidirectional(LSTM(32,return_sequences=True,dropout=0.25), name = 'LSTM4')(x)


output = Dense(len(charlist)+1,activation='softmax',name='dense2')(x)


model = Model(input_img,output,name='basic-OCRplusvartr')
model.compile(optimizer='adam',loss=CTCLoss) #try different optimizers, lstm rows, etc

In [None]:
eps = 100
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=10, restore_best_weights=True
)
history = model2.fit(img_tensor_trsh,lab_tensor_sh,validation_split=0.2, epochs=eps,callbacks=[early_stopping])

In [None]:
preds = Model(model.get_layer(name="image").input, model.get_layer(name='dense2').output)

In [None]:
guesses, probs = decode_preds(prediction_model,0,1200)

In [None]:
post_distance(prediction_model,0,1407)