In [7]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import pytesseract as pt
import plotly.express as px
import matplotlib.pyplot as plt
import xml.etree.ElementTree as xet

from glob import glob
from skimage import io
from shutil import copy
import tensorflow.keras.backend as K
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def load_and_extract_labels():
    path = glob('../input/labeled-licence-plates-dataset/dataset/train/*.xml')
    labels_dict = dict(filepath=[], xmin=[], xmax=[], ymin=[], ymax=[])

    for filename in path:
        info = xet.parse(filename)
        root = info.getroot()
        member_object = root.find('object')
        labels_info = member_object.find('bndbox')
        xmin = int(labels_info.find('xmin').text)
        xmax = int(labels_info.find('xmax').text)
        ymin = int(labels_info.find('ymin').text)
        ymax = int(labels_info.find('ymax').text)

        labels_dict['filepath'].append(filename)
        labels_dict['xmin'].append(xmin)
        labels_dict['xmax'].append(xmax)
        labels_dict['ymin'].append(ymin)
        labels_dict['ymax'].append(ymax)

    df = pd.DataFrame(labels_dict)
    df.to_csv('labels.csv', index=False)
    return df

def preprocess_data(df):
    filename = df['filepath'][0]
    
    def get_filename(filename):
        filename_image = xet.parse(filename).getroot().find('filename').text
        filepath_image = os.path.join('../input/labeled-licence-plates-dataset/dataset/train', filename_image)
        return filepath_image

    image_path = list(df['filepath'].apply(get_filename))

    labels = df.iloc[:, 1:].values
    data = []
    output = []

    for ind in range(len(image_path)):
        image = image_path[ind]
        img_arr = cv2.imread(image)
        h, w, d = img_arr.shape
        
        # Preprocessing
        load_image = load_img(image, target_size=(224, 224))
        load_image_arr = img_to_array(load_image)
        norm_load_image_arr = load_image_arr / 255.0

        # Normalization to labels
        xmin, xmax, ymin, ymax = labels[ind]
        nxmin, nxmax = xmin / w, xmax / w
        nymin, nymax = ymin / h, ymax / h
        label_norm = (nxmin, nxmax, nymin, nymax)
        data.append(norm_load_image_arr)
        output.append(label_norm)

    X = np.array(data, dtype=np.float32)
    y = np.array(output, dtype=np.float32)

    x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=0)
    return x_train, x_test, y_train, y_test

def build_object_detection_model():
    inception_resnet = InceptionResNetV2(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3)))
    headmodel = inception_resnet.output
    headmodel = Flatten()(headmodel)
    headmodel = Dense(512, activation="relu")(headmodel)
    headmodel = Dense(256, activation="relu")(headmodel)
    headmodel = Dense(4, activation='sigmoid')(headmodel)

    model = Model(inputs=inception_resnet.input, outputs=headmodel)

    model.compile(loss=iou_loss, optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4))
    return model

def train_object_detection_model(model, x_train, y_train, x_test, y_test):
    tfb = TensorBoard('object_detection')
    class IoUCallback(tf.keras.callbacks.Callback):
        def on_epoch_end(self, epoch, logs=None):
            test_arr = x_test
            pred_coords = model.predict(test_arr)
            iou_list = [calculate_iou(true_coords, pred_coords[i]) for i, true_coords in enumerate(y_test)]
            mean_iou = np.mean(iou_list)
            print("Mean IoU on Validation Set:", mean_iou)

    history = model.fit(x=x_train, y=y_train, batch_size=10, epochs=150,
                        validation_data=(x_test, y_test), callbacks=[tfb, IoUCallback()])
    
    return history

def object_detection(path, model):
    image = load_img(path)
    image = np.array(image, dtype=np.uint8)
    image1 = load_img(path, target_size=(224, 224))

    image_arr_224 = img_to_array(image1) / 255.0
    h, w, d = image.shape
    test_arr = image_arr_224.reshape(1, 224, 224, 3)

    coords = model.predict(test_arr)

    denorm = np.array([w, w, h, h])
    coords = coords * denorm
    coords = coords.astype(np.int32)

    ground_truth_coords = [xmin, ymin, xmax - xmin, ymax - ymin]
    iou = calculate_iou(ground_truth_coords, coords[0])
    print("IoU:", iou)
    
    xmin, xmax, ymin, ymax = coords[0]
    pt1 = (xmin, ymin)
    pt2 = (xmax, ymax)
    print("Bounding Box Coordinates:", pt1, pt2)
    cv2.rectangle(image, pt1, pt2, (0, 255, 0), 3)
    
    return image, coords

def extract_text_from_image(image, cods):
    xmin, xmax, ymin, ymax = cods[0]
    roi = image[ymin:ymax, xmin:xmax]
    
    text = pt.image_to_string(roi)
    return text

def calculate_iou(box1, box2):
    x1, y1, w1, h1 = box1
    x2, y2, w2, h2 = box2

    intersection_x1 = max(x1, x2)
    intersection_y1 = max(y1, y2)
    intersection_x2 = min(x1 + w1, x2 + w2)
    intersection_y2 = min(y1 + h1, y2 + h2)

    intersection_area = max(0, intersection_x2 - intersection_x1) * max(0, intersection_y2 - intersection_y1)

    box1_area = w1 * h1
    box2_area = w2 * h2

    union_area = box1_area + box2_area - intersection_area

    iou = intersection_area / union_area
    return iou

def iou_loss(y_true, y_pred):
    true_xmin, true_xmax, true_ymin, true_ymax = tf.unstack(y_true, axis=-1)
    pred_xmin, pred_xmax, pred_ymin, pred_ymax = tf.unstack(y_pred, axis=-1)

    xmin = K.maximum(true_xmin, pred_xmin)
    xmax = K.minimum(true_xmax, pred_xmax)
    ymin = K.maximum(true_ymin, pred_ymin)
    ymax = K.minimum(true_ymax, pred_ymax)

    intersection_area = K.maximum(0.0, xmax - xmin) * K.maximum(0.0, ymax - ymin)

    true_area = (true_xmax - true_xmin) * (true_ymax - true_ymin)
    pred_area = (pred_xmax - pred_xmin) * (pred_ymax - pred_ymin)
    union_area = true_area + pred_area - intersection_area

    iou = intersection_area / (union_area + K.epsilon())

    return 1.0 - iou

In [8]:
df = load_and_extract_labels()

x_train, x_test, y_train, y_test = preprocess_data(df)

In [9]:
model = build_object_detection_model()

In [None]:
history = train_object_detection_model(model, x_train, y_train, x_test, y_test)

In [None]:
test_image_path = '../input/labeled-licence-plates-dataset/dataset/test/104.jpg'
detected_image, bounding_box_coords = object_detection(test_image_path, model)

extracted_text = extract_text_from_image(detected_image, bounding_box_coords)
print("Extracted Text:", extracted_text)

In [None]:
fig = px.imshow(detected_image)
    fig.update_layout(width=700, height=500, margin=dict(l=10, r=10, b=10, t=10), xaxis_title='Figure 14')
    img = np.array(load_img(test_image_path))
    xmin, xmax, ymin, ymax = bounding_box_coords[0]
    roi = img[ymin:ymax, xmin:xmax]
    fig = px.imshow(roi)
    fig.update_layout(width=350, height=250, margin=dict(l=10, r=10, b=10, t=10), xaxis_title='Cropped image')