In [1]:
import cv2
import numpy as np

In [2]:
# Download the EAST model and Load it
model = cv2.dnn.readNet('frozen_east_text_detection.pb')

In [3]:
# ## Prepare the image
img = cv2.imread('bank_card.png')

# use multiple of 32 to set the new img shape
height, width, _ = img.shape
print(height, width)

new_height = (height//32)*32
new_width = (width//32)*32
print(new_height, new_width)

688 1102
672 1088


In [4]:
# get the ratio change in width and height
h_ratio = height/new_height
w_ratio = width/new_width
print(h_ratio, w_ratio)

1.0238095238095237 1.0128676470588236


In [5]:
blob = cv2.dnn.blobFromImage(img, 1, (new_width, new_height),(123.68, 116.78, 103.94), True, False)

In [6]:
# ## Pass the image to network and extract score and geometry map
model.setInput(blob)

model.getUnconnectedOutLayersNames()

(geometry, scores) = model.forward(model.getUnconnectedOutLayersNames())

In [7]:
# ## Post-Processing

rectangles = []
confidence_score = []
for i in range(geometry.shape[2]):
    for j in range(0, geometry.shape[3]):
        
        if scores[0][0][i][j] < 0.1:
            continue
            
        bottom_x = int(j*4 + geometry[0][1][i][j])
        bottom_y = int(i*4 + geometry[0][2][i][j])
        

        top_x = int(j*4 - geometry[0][3][i][j])
        top_y = int(i*4 - geometry[0][0][i][j])
        
        rectangles.append((top_x, top_y, bottom_x, bottom_y))
        confidence_score.append(float(scores[0][0][i][j]))

In [8]:
from imutils.object_detection import non_max_suppression

In [9]:
# use Non-max suppression to get the required rectangles
fin_boxes = non_max_suppression(np.array(rectangles), probs=confidence_score, overlapThresh=0.5)

In [10]:
img_copy = img.copy()

for (x1, y1, x2, y2) in fin_boxes:

    x1 = int(x1 * w_ratio)
    y1 = int(y1 * h_ratio)
    x2 = int(x2 * w_ratio)
    y2 = int(y2 * h_ratio)
    
    cv2.rectangle(img_copy, (x1, y1), (x2, y2), (0, 255, 0), 1)

cv2.imshow("Text Detection", img_copy)
cv2.waitKey(0)

-1

In [11]:
fin_boxes = [[int(x1*w_ratio), int(y1 * h_ratio), int(x2 * w_ratio), int(y2 * h_ratio)] for x1, y1, x2, y2 in fin_boxes]

print(fin_boxes)

[[907, 463, 1042, 488], [678, 460, 749, 479], [97, 567, 377, 605], [924, 557, 1022, 593], [99, 485, 211, 528], [738, 460, 791, 479], [465, 396, 608, 443], [282, 396, 423, 442], [679, 484, 801, 527], [92, 395, 240, 443], [776, 72, 1004, 134], [652, 396, 794, 441], [446, 71, 749, 133], [593, 487, 651, 514], [596, 500, 655, 528]]


In [12]:
def crop_images(image, fin_boxes):
    crop_imgs = []

    for boundingbox in fin_boxes:

        x1 = boundingbox[0]
        y1 = boundingbox[1]
        x2 = boundingbox[2]
        y2 = boundingbox[3]

        crop_img = img[y1:y2, x1:x2]

        crop_imgs.append(crop_img)
        
    return crop_imgs

In [13]:
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
import tensorflow.keras.backend as K

from tensorflow.keras.layers import Dense, BatchNormalization, Input, Conv2D, MaxPool2D, Lambda, Bidirectional, LSTM
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [14]:
char_list = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

In [15]:
class CTCLayer(layers.Layer):

    def __init__(self, name=None):

        super().__init__(name=name)
        self.loss_fn = keras.backend.ctc_batch_cost

    def call(self, y_true, y_pred):
        # Compute the training-time loss value and add it
        # to the layer using `self.add_loss()`.

        batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
        input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
        label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

        input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
        label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
        self.add_loss(loss)

        # At test time, just return the computed predictions
        return y_pred

In [16]:
def create_crnn():
    
    # input with shape of height=32 and width=128 
    inputs = Input(shape=(32, 128, 1), name="image")

    labels = layers.Input(name="label", shape=(None,), dtype="float32")

    conv_1 = Conv2D(32, (3,3), activation = "selu", padding='same')(inputs)
    pool_1 = MaxPool2D(pool_size=(2, 2))(conv_1)
    
    conv_2 = Conv2D(64, (3,3), activation = "selu", padding='same')(pool_1)
    pool_2 = MaxPool2D(pool_size=(2, 2))(conv_2)

    conv_3 = Conv2D(128, (3,3), activation = "selu", padding='same')(pool_2)
    conv_4 = Conv2D(128, (3,3), activation = "selu", padding='same')(conv_3)

    pool_4 = MaxPool2D(pool_size=(2, 1))(conv_4)
    
    conv_5 = Conv2D(256, (3,3), activation = "selu", padding='same')(pool_4)
    
    # Batch normalization layer
    batch_norm_5 = BatchNormalization()(conv_5)
    
    conv_6 = Conv2D(256, (3,3), activation = "selu", padding='same')(batch_norm_5)
    batch_norm_6 = BatchNormalization()(conv_6)
    pool_6 = MaxPool2D(pool_size=(2, 1))(batch_norm_6)
    
    conv_7 = Conv2D(64, (2,2), activation = "selu")(pool_6)
    
    squeezed = Lambda(lambda x: K.squeeze(x, 1))(conv_7)
    
    # bidirectional LSTM layers with units=128
    blstm_1 = Bidirectional(LSTM(128, return_sequences=True))(squeezed)
    blstm_2 = Bidirectional(LSTM(128, return_sequences=True))(blstm_1)

    softmax_output = Dense(len(char_list) + 1, activation = 'softmax', name="dense")(blstm_2)

    output = CTCLayer(name="ctc_loss")(labels, softmax_output) #y_true = labels, y_pred = softmax_output

    #model to be used at training time
    model = Model(inputs=[inputs, labels], outputs=output)
    
    return model

In [17]:
model = create_crnn()

optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, clipnorm=1.0)
model.compile(optimizer = optimizer)

In [18]:
model.load_weights('C_LSTM_best.hdf5')

In [19]:
# Get the prediction model by extracting layers till the output layer
prediction_model = keras.models.Model(
    model.input[0], model.get_layer(name="dense").output #model.input[0] corresponses model.get_layer(name="inage").input
)

In [20]:
from itertools import groupby

def ctc_decoder(predictions):
    '''
    input: given batch of predictions from text rec model
    output: return lists of raw extracted text

    '''
    text_list = []
    
    pred_indcies = np.argmax(predictions, axis=2)
    
    for i in range(pred_indcies.shape[0]):
        ans = ""
        
        ## merge repeats
        merged_list = [k for k,_ in groupby(pred_indcies[i])]
        
        ## remove blanks
        for p in merged_list:
            if p != len(char_list): # len(char_list) = 62, which is a number that be pass as a padding of labels
                ans += char_list[int(p)]
        
        text_list.append(ans)
        
    return text_list

In [22]:
final_texts = []

for crop_img in crop_images(img_copy, fin_boxes):

    test_image = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)

    test_image = cv2.resize(test_image, (128, 32))

    #cv2.imshow("image", test_image)
    #cv2.waitKey(0)

    test_image = test_image / 255

    test_image = np.expand_dims(test_image, axis=0)

    test_image = test_image.reshape([32, 128, 1])

    test_image = test_image[np.newaxis]
    
    preds = prediction_model.predict(test_image)
    pred_texts = ctc_decoder(preds)

    final_texts.append(pred_texts)

print(final_texts)

[['Mastercad'], ['MMONTHD'], ['CARDHOLDER'], ['WISA'], ['gADD'], ['YYEAR'], ['PBnG'], ['Bdgla'], ['MIsO'], ['Pebs'], ['CARID'], ['IduE'], ['CRIEDIT'], ['VALID'], ['Tupt']]


In [24]:
for i in range(0, len(final_texts)):
    cv2.putText(img_copy, 
                final_texts[i][0], 
                (fin_boxes[i][0] - 10, fin_boxes[i][1] - 10), 
                cv2.FONT_HERSHEY_SIMPLEX, 
                1.2, 
                (0, 255, 0), 
                2)

cv2.imshow("Image", img_copy)
cv2.waitKey(0)

-1