Mount google drive


In [40]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/license_plate_ocr/
!ls

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/license_plate_ocr
5cnn_model.h5	       8cnn_128lstm_model.h5  hdr.zip	  output.csv
7cnn_128lstm_model.h5  dataset.csv	      normal	  Untitled.ipynb
7cnn_model.h5	       hdr		      normal.zip


Import libraries

In [41]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2

from pathlib import Path
from collections import Counter

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from sklearn import preprocessing
from sklearn.model_selection import train_test_split

Load Dataset

In [69]:
dataset = pd.read_csv("dataset.csv",names=["images","labels"])
dataset["images"] = dataset["images"].apply(lambda s:"hdr/"+s if "crop_h" in s else "normal/"+s)
dataset["type"] = dataset["images"].apply(lambda s:"hd" if "crop_h" in s else "normal")

images = dataset["images"]
labels = dataset["labels"]

max_length = max([len(label) for label in labels])
dataset["labels"]= dataset["labels"].apply(lambda s:s+(max_length-len(s))*"#")

labels = dataset["labels"]

characters = set(char for label in labels for char in label)


batch_size = 16

img_width = 200
img_height = 50

downsample_factor = 4





Preprocessing

In [70]:
# Mapping characters to integers
char_to_num = layers.experimental.preprocessing.StringLookup(
    vocabulary=list(characters), num_oov_indices=0, mask_token=None
)

# Mapping integers back to original characters
num_to_char = layers.experimental.preprocessing.StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)


# Splitting data into training and validation sets
#X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.20, random_state=42)
#X_train, X_valid, y_train, y_valid = train_test_split( X_train,y_train ,test_size=0.10, random_state=42)

def split_data(images, labels, train_size=0.8, shuffle=True):
    # 1. Get the total size of the dataset
    size = len(images)
    # 2. Make an indices array and shuffle it, if required
    indices = np.arange(size)
    if shuffle:
        np.random.shuffle(indices)
    # 3. Get the size of training samples
    train_samples = int(size * train_size)
    # 4. Split data into training and validation sets
    x_train, y_train = images[indices[:train_samples]], labels[indices[:train_samples]]
    x_valid, y_valid = images[indices[train_samples:]], labels[indices[train_samples:]]
    return x_train, x_valid, y_train, y_valid


# Splitting data into training and validation sets
x_train, x_valid, y_train, y_valid = split_data(np.array(images), np.array(labels))



def encode_single_sample(img_path, label):
    # 1. Read image
    img = tf.io.read_file(img_path)
    # 2. Decode and convert to grayscale
    img = tf.io.decode_png(img, channels=1)
    # 3. Convert to float32 in [0, 1] range
    img = tf.image.convert_image_dtype(img, tf.float32)
    # 4. Resize to the desired size
    img = tf.image.resize(img, [img_height, img_width])
    # 5. Transpose the image because we want the time
    # dimension to correspond to the width of the image.
    img = tf.transpose(img, perm=[1, 0, 2])
    # 6. Map the characters in label to numbers
    label = char_to_num(tf.strings.unicode_split(label, input_encoding="UTF-8"))


    return {"image": img, "label": label,"path":img_path}

Create Test Train Data

In [71]:

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = (
  train_dataset.map(
       encode_single_sample, num_parallel_calls=tf.data.experimental.AUTOTUNE
    )
    .batch(batch_size)
    .prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
)

validation_dataset = tf.data.Dataset.from_tensor_slices((x_valid, y_valid))
validation_dataset = (
    validation_dataset.map(
        encode_single_sample, num_parallel_calls=tf.data.experimental.AUTOTUNE
    )
    .batch(batch_size)
    .prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
)


Build model

In [72]:
class CTCLayer(layers.Layer):
    def __init__(self, name=None):
        super().__init__(name=name)
        self.loss_fn = keras.backend.ctc_batch_cost

    def call(self, y_true, y_pred):
      
        print(tf.shape(y_true))
        batch_len = tf.cast(tf.shape(y_true)[0], dtype="int64")
        input_length = tf.cast(tf.shape(y_pred)[1], dtype="int64")
        label_length = tf.cast(tf.shape(y_true)[1], dtype="int64")

        input_length = input_length * tf.ones(shape=(batch_len, 1), dtype="int64")
        label_length = label_length * tf.ones(shape=(batch_len, 1), dtype="int64")

        
        loss = self.loss_fn(y_true, y_pred, input_length, label_length)
        self.add_loss(loss)

       
        return y_pred


def build_model():
    # Inputs to the model
    input_img = layers.Input(
        shape=(img_width, img_height, 1), name="image", dtype="float32"
    )
    labels = layers.Input(name="label", shape=(None,), dtype="float32")

    # First conv block
    x = layers.Conv2D(
        64,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv1",
    )(input_img)
    

    # Second conv block
    x = layers.Conv2D(
        64,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv2",
    )(x)
    x = layers.MaxPooling2D((2, 2), name="pool1")(x)

    # Third conv block
    x = layers.Conv2D(
        64,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv3",
    )(x)
   

    # Furth conv block
    x = layers.Conv2D(
        64,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv4",
    )(x)

    x = layers.MaxPooling2D((2, 2), name="pool2")(x)
    # Batch normalization layer
    batch_norm_4 = layers.BatchNormalization()
    
    x = layers.Conv2D(
        64,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv5",
    )(x)

    x = layers.Conv2D(
        64,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv6",
    )(x)
    x = layers.MaxPooling2D((2, 2), name="pool3")(x)
    
    x = layers.Conv2D(
        64,
        (3, 3),
        activation="relu",
        kernel_initializer="he_normal",
        padding="same",
        name="Conv7",
    )(x)
    

    x = layers.MaxPooling2D((2, 2), name="pool4")(x)

    # Batch normalization layer
    batch_norm_5 = layers.BatchNormalization()
    # We have used four max pool with pool size and strides 2.
    # Hence, downsampled feature maps are 16x smaller. The number of
    # filters in the last layer is 64. Reshape accordingly before
    # passing the output to the RNN part of the model
    new_shape = ((img_width // 16), (img_height // 16) * 64)
    x = layers.Reshape(target_shape=new_shape, name="reshape")(x)
    x = layers.Dense(64, activation="relu", name="dense1")(x)
    x = layers.Dropout(0.2)(x)

    # RNNs
    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)
    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True, dropout=0.25))(x)

    # Output layer
    x = layers.Dense(len(characters) + 1, activation="softmax", name="dense2")(x)

    # Add CTC layer for calculating CTC loss at each step
    output = CTCLayer(name="ctc_loss")(labels, x)

    # Define the model
    model = keras.models.Model(
        inputs=[input_img, labels], outputs=output, name="ocr_model_v1"
    )
    # Optimizer
    opt = keras.optimizers.Adam()
    # Compile the model and return
    model.compile(optimizer=opt)
    return model


# Get the model
model = build_model()
model.summary()

Tensor("ctc_loss/Shape:0", shape=(2,), dtype=int32)
Model: "ocr_model_v1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
image (InputLayer)              [(None, 200, 50, 1)] 0                                            
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 200, 50, 64)  640         image[0][0]                      
__________________________________________________________________________________________________
Conv2 (Conv2D)                  (None, 200, 50, 64)  36928       Conv1[0][0]                      
__________________________________________________________________________________________________
pool1 (MaxPooling2D)            (None, 100, 25, 64)  0           Conv2[0][0]                      
___________________________________

Training

In [73]:
epochs = 100
early_stopping_patience = 10
# Add early stopping
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_loss", patience=early_stopping_patience, restore_best_weights=True
)

# Train the model
history = model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=epochs,
    callbacks=[early_stopping],
)

Epoch 1/100


  [n for n in tensors.keys() if n not in ref_input_names])


Tensor("ocr_model_v1/ctc_loss/Shape:0", shape=(2,), dtype=int32)
Tensor("ocr_model_v1/ctc_loss/Shape:0", shape=(2,), dtype=int32)
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100


Predictions on validation data

In [80]:
# Get the prediction model by extracting layers till the output layer
prediction_model = keras.models.Model(
    model.get_layer(name="image").input, model.get_layer(name="dense2").output
)


prediction_model.summary()

# A utility function to decode the output of the network
def decode_batch_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    # Use greedy search. For complex tasks, you can use beam search
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
        :, :max_length
    ]
    # Iterate over the results and get back the text
    output_text = []
    for res in results:
        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
        output_text.append(res)
    return output_text

predictions = []
orig_texts = []
image_paths = []

for batch in validation_dataset:
    batch_images = batch["image"]
    batch_labels = batch["label"]
    batch_paths = batch["path"]
 
    preds = prediction_model.predict(batch_images)
    pred_texts = decode_batch_predictions(preds)
    predictions = predictions + pred_texts

    
    for label in batch_labels:
        label = tf.strings.reduce_join(num_to_char(label)).numpy().decode("utf-8")
        orig_texts.append(label)
  
    for path in batch_paths:
      image_paths.append(tf.strings.reduce_join(path).numpy().decode("utf-8"))

Model: "model_10"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
image (InputLayer)           [(None, 200, 50, 1)]      0         
_________________________________________________________________
Conv1 (Conv2D)               (None, 200, 50, 64)       640       
_________________________________________________________________
Conv2 (Conv2D)               (None, 200, 50, 64)       36928     
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 100, 25, 64)       0         
_________________________________________________________________
Conv3 (Conv2D)               (None, 100, 25, 64)       36928     
_________________________________________________________________
Conv4 (Conv2D)               (None, 100, 25, 64)       36928     
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 50, 12, 64)        0  

Save model

In [82]:
prediction_model.save("7cnn_128lstm_model.h5")

Measure accuracy

In [83]:
output_df = pd.DataFrame()
output_df["predicted_text"] = predictions
output_df["original_text"] = orig_texts
output_df["img_paths"] = image_paths
output_df["predicted_text"]=output_df["predicted_text"].apply(lambda s:s.replace("[UNK]",""))
output_df["original_text"]=output_df["original_text"].apply(lambda s:s.replace("[UNK]",""))
output_df.to_csv("output.csv")

In [84]:
pd.DataFrame(output_df["predicted_text"]==output_df["original_text"]).value_counts()

True     109
False     22
dtype: int64

In [86]:
output_df

Unnamed: 0,predicted_text,original_text,img_paths
0,5B82908,5B82908,hdr/crop_h2/I00077.png
1,2Z5672,5M56740,normal/crop_m4/I00027.png
2,5B1149,5B11149,normal/crop_m2/I00067.png
3,7B11607,7B11607,hdr/crop_h3/I00040.png
4,7B29430,7B29430,normal/crop_m2/I00043.png
...,...,...,...
126,7B58307,7B58307,normal/crop_m2/I00052.png
127,8B71401,8B71401,normal/crop_m2/I00020.png
128,8B86266,8B86266,hdr/crop_h3/I00110.png
129,9B78701,9B78701,hdr/crop_h4/I00053.png
