In [None]:
# Importing the Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
import string
import pandas as pd


# In[2]:


# Deep Learning Imports
import tensorflow as tf
from tensorflow.keras import backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, add,MaxPooling2D, concatenate,Reshape, Bidirectional, LSTM,GRU, Dense, Lambda, Activation, BatchNormalization, Dropout,Concatenate
from tensorflow.keras.optimizers import Adam,SGD,Adagrad


In [None]:
# from tensorflow.keras import mixed_precision


# policy = mixed_precision.experimental.Policy('mixed_float16')
# mixed_precision.set_global_policy(policy)

# os.environ['TF_ENABLE_AUTO_MIXED_PRECISION'] = '1'
# In[3]:
timestamps=64
char=string.ascii_uppercase+"./-0123456789"

# char=string.ascii_lowercase
test_dir="../input/20k-synthetic-ocr-dataset/files/20k test"
train_dir="../input/20k-synthetic-ocr-dataset/files/20k train"
# Preparing the Data


### check train and test set distribution

for d,c in zip([test_dir, train_dir],["../input/20k-synthetic-ocr-dataset/test.csv","../input/20k-synthetic-ocr-dataset/train.csv"]):

    print(c.split(".")[0]+"set distribution")
    filename=[os.path.join(d,i) for i in os.listdir(d)]

    track=dict(zip(char,[0]*len(char)))
    data=pd.read_csv(c)
    for file in data["label"]:
        for l in file:
            track[l]+=1
    print(track)
    print("===="*20)


In [None]:
def num2label(num):
 ret = ""
 for ch in num:
     if ch == -1:  # CTC Blank
         break
     else:
         ret+=alphabets[ch]
 return ret



# In[4]:


# Custom Data Generator
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self,batch_size=256,image_size=16,max_str_len=20,channels=3,path_to_img_dir=train_dir,shuffle=True,timestamps=timestamps,data="../input/20k-synthetic-ocr-dataset/train.csv"):
        self.info_csv=pd.read_csv(data)
        self.batch_size = batch_size
        self.image_size=image_size
        self.channels=channels
        self.shuffle = shuffle
#         self.on_epoch_end()
        self.alphabets=char
        self.max_str_len=max_str_len
        self.num_characters=len(self.alphabets)+1
        self.num_timestamps=timestamps
        self.path=path_to_img_dir
        self.num_examples=len(os.listdir(self.path))
        self.indices =list(range(self.num_examples))
        self.images_path=[os.path.join(self.path,i) for i in self.info_csv["image name"]]
        self.label=self.info_csv["label"]
        self.n = 0
        self.max = self.__len__()

    def __len__(self):
        return len(self.indices) // self.batch_size

    def __getitem__(self, index):
        inds = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
#         batch = [self.indices[k] for k in index]
        
        X,y,label_len,output_len,output = self.__get_data(inds)
        
        return [X,y,output_len,label_len],output

    def label_to_num(self,label):
        label_num = []
        for ch in label:
            label_num.append(self.alphabets.find(ch))

        return np.array(label_num)


    def on_epoch_end(self):
        self.index = np.arange(len(self.indices))
        if self.shuffle == True:
            np.random.shuffle(self.index)
            
    def create_labels(self,raw_labels,batch):
        num_examples=len(batch)
        y = np.ones([num_examples,self.max_str_len]) * -1
        label_len = np.zeros([num_examples, 1])
        output_len = np.ones([num_examples,1]) * (self.num_timestamps-2)
        output = np.zeros([num_examples])

        for i in range(num_examples):
            label_len[i] = len(raw_labels[i])
            y[i, 0:len(raw_labels[i])]= self.label_to_num(raw_labels[i])

        return y,label_len,output_len,output

    def preprocess(self,imgPath):
        img=cv2.imread(imgPath,cv2.IMREAD_GRAYSCALE)
        img=cv2.resize(img,(256,64))
    
        (h, w) = img.shape
        
        final_img = np.ones([64, 256])*255 # blank white image
        
        # crop
        if w > 256:
            img = img[:, :256]
            
        if h > 64:
            img = img[:64, :]
        
        
        final_img[:h, :w] = img
        train_x=cv2.rotate(final_img, cv2.ROTATE_90_CLOCKWISE)/255.

        return train_x

    def preprocess_data(self,batch):
        images_array=[]
        labels=[]
        
        batch_images=[self.images_path[i] for i in batch]
        lbls=[self.label[i] for i in batch]
        for single_image_path,label in zip(batch_images,lbls):

            img=self.preprocess(single_image_path)
            images_array.append(np.expand_dims(img,axis=0))
            
            labels.append(label)
            
        train_x=np.vstack(images_array)
        
        return np.array(train_x).reshape(-1, 256, 64, 1) ,np.array(labels)

    def __get_data(self, batch):
        input_images,input_labels=self.preprocess_data(batch)
        
        input_y,input_label_len,input_output_len,input_output=self.create_labels(input_labels,batch)
            
        return input_images, input_y,input_label_len,input_output_len,input_output
    
    def __next__(self):
        if self.n >= self.max:
           self.n = 0
        result= self.__getitem__(self.n)
        self.n += 1
        return result


# In[5]:

In [None]:
train_dg=DataGenerator()
val_dg=DataGenerator(path_to_img_dir=test_dir,data="../input/20k-synthetic-ocr-dataset/test.csv")

x,y=next(val_dg)

In [None]:

# Preparing Labels for CTC Loss
images=os.listdir(train_dir)

alphabets = char

max_str_len=20

num_of_characters=len(alphabets)+1

num_of_timestamps=timestamps


## verify data

# for i in range(10):
#     plt.imshow(x[0][i])
#     word=num2label(x[1][i].astype("int"))
#     print(word)
#     plt.show()


input_data = Input(shape=(256, 64, 1), name='input')

inner = Conv2D(32, (3, 3), padding='same', name='conv1', kernel_initializer='he_normal')(input_data)  
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(2, 2), name='max1')(inner)

inner = Conv2D(64, (3, 3), padding='same', name='conv2', kernel_initializer='he_normal')(inner)
inner = Conv2D(64, (3, 3), padding='same', name='conv2_1', kernel_initializer='he_normal')(inner)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(2, 2), name='max2')(inner)
inner = Dropout(0.5)(inner)

inner = Conv2D(128, (3, 3), padding='same', name='conv3', kernel_initializer='he_normal')(inner)
inner = Conv2D(128, (3, 3), padding='same', name='conv3_1', kernel_initializer='he_normal')(inner)
inner = BatchNormalization()(inner)
inner = Activation('relu')(inner)
inner = MaxPooling2D(pool_size=(1, 2), name='max3')(inner)
inner = Dropout(0.7)(inner)

# CNN to RNN
inner = Reshape(target_shape=((64, 1024)), name='reshape')(inner)
inner = Dense(64, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)

## RNN
inner = Bidirectional(GRU(256, return_sequences=True), name = 'lstm1')(inner)
inner = Bidirectional(GRU(256, return_sequences=True), name = 'lstm2')(inner)

## OUTPUT
inner = Dense(num_of_characters, kernel_initializer='he_normal',name='dense2')(inner)
y_pred = Activation('softmax', name='softmax')(inner)

model = Model(inputs=input_data, outputs=y_pred)
# model.summary()





def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage
    y_pred = y_pred[:, 2:, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)


In [None]:

labels = Input(name='gtruth_labels', shape=[max_str_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')

ctc_loss = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
model_final = Model(inputs=[input_data, labels, input_length, label_length],outputs=ctc_loss)


In [None]:
model_final.summary()

In [None]:
## Defining Model Callbacks
save_best=tf.keras.callbacks.ModelCheckpoint(filepath="GRU_uppercase.h5",save_best_only=True,save_weights_only=True,verbose=True)
reduce_lr=tf.keras.callbacks.ReduceLROnPlateau(min_lr_rate=0.00000000000001,factor=0.3,patience=10,verbose=True)


In [None]:

## Defining Model Callbacks
save_best=tf.keras.callbacks.ModelCheckpoint(filepath="GRU_uppercase.h5",save_best_only=True,save_weights_only=True,verbose=True)
reduce_lr=tf.keras.callbacks.ReduceLROnPlateau(min_lr_rate=0.00000000000001,factor=0.3,patience=10,verbose=True)

In [None]:
# model.load_weights("Lstm_uppercase.h5")
## In[ ]:

# opt=tf.keras.optimizers.Adadelta(learning_rate=0.001, rho=0.95, epsilon=1e-07, name="Adadelta")
## the loss calculation occurs elsewhere, so we use a dummy lambda function for the loss

model_final.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=Adam(lr=0.0001))

model_final.fit(train_dg, 
                validation_data=val_dg,
                epochs=200,callbacks=[save_best,reduce_lr],workers=8)




In [None]:
model.save('20k_synthetic_ocr_dataset.h5')

In [None]:

batch_size=len(os.listdir("../input/20k-synthetic-ocr-dataset/files/20k test"))
test_dg=DataGenerator(path_to_img_dir=test_dir,data="../input/20k-synthetic-ocr-dataset/test.csv",batch_size=batch_size)

X,y=next(test_dg)
print(X[0].shape)
# Checking the Performance of the Model on Testing Set
preds = model.predict(X[0])

print(preds)
decoded = K.get_value(K.ctc_decode(preds, input_length=np.ones(preds.shape[0])*preds.shape[1], 
                                   greedy=True)[0][0])

prediction = []
for i in decoded:
    prediction.append(num2label(i))

gt = []
for i in X[1].astype("int"):
    gt.append(num2label(i))


# In[47]:

correct_char = 0
total_char = 0
correct = 0

wrong_preds=[]
for i in range(X[0].shape[0]):
    pr = prediction[i]
    tr = gt[i]
    total_char += len(tr)
    
    for j in range(min(len(tr), len(pr))):
        if tr[j] == pr[j]:
            correct_char += 1
            
    if pr == tr :
        correct += 1

    elif pr!=tr:
        wrong_preds.append(i) 
    
print('Correct characters predicted : %.2f%%' %(correct_char*100/total_char))
print('Correct words predicted      : %.2f%%' %(correct*100/X[0].shape[0]))

##
### In[48]:
##
##

def preprocess(imgPath):
        img=cv2.imread(imgPath,cv2.IMREAD_GRAYSCALE)
        img=cv2.resize(img,(256,64))
    
        (h, w) = img.shape
        
        final_img = np.ones([64, 256])*255 # blank white image
        
        # crop
        if w > 256:
            img = img[:, :256]
            
        if h > 64:
            img = img[:64, :]
        
        
        final_img[:h, :w] = img
        train_x=cv2.rotate(final_img, cv2.ROTATE_90_CLOCKWISE)/255.

        return train_x

test_images=np.vstack([preprocess(os.path.join('../input/20k-synthetic-ocr-dataset/files/real_images',i)) for i in os.listdir("../input/20k-synthetic-ocr-dataset/files/real_images")])

test_array=np.array(test_images).reshape(-1, 256, 64, 1)




plt.figure(figsize=(15, 10))

for i in range(10):
    ax = plt.subplot(2, 5, i+1)
    rand_int=np.random.choice(wrong_preds)
    image=X[0][rand_int]
    gt=X[1][rand_int]
    plt.imshow(np.rot90(image))
    
    pred = model.predict(np.expand_dims(image,axis=0))
    decoded = K.get_value(K.ctc_decode(pred, input_length=np.ones(pred.shape[0])*pred.shape[1], 
                                       greedy=True)[0][0])
    print(decoded)

    print(num2label(gt.astype("int")),"predicted label is --->",num2label(decoded[0]))
    plt.title(num2label(decoded[0]), fontsize=14)
    plt.axis('off')
    
plt.subplots_adjust(wspace=0.2, hspace=0.8)

plt.show()



for i in range(20):
    ax = plt.subplot(2, 10, i+1)
    rand_int=np.random.randint(0,len(test_array))
    image=test_array[rand_int]
    plt.imshow(np.rot90(image))
    
    pred = model.predict(np.expand_dims(image,axis=0))
    decoded = K.get_value(K.ctc_decode(pred, input_length=np.ones(pred.shape[0])*pred.shape[1], 
                                       greedy=True)[0][0])
    print(decoded)

    print("predicted label is --->",num2label(decoded[0]))
    plt.title(num2label(decoded[0]), fontsize=14)
    plt.axis('off')
    
plt.subplots_adjust(wspace=0.2, hspace=0.8)

plt.show()

Models Predict the labels very well. Apart from the first Character in predictions.

# References

Thanks to Aurthor for sharing this data and its code.