In [1]:
import os 
import cv2 
import numpy as np 
import pandas as pd 
import tensorflow as tf 
import pytesseract as pt 
import plotly.express as px 
import matplotlib.pyplot as plt

import xml.etree.ElementTree as xet
from glob import glob 
from skimage import io 
from shutil import copy 

from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import InceptionResNetV2

from tensorflow.keras.layers import Dense,Dropout,Flatten,Input
from tensorflow.keras.preprocessing.image import load_img,img_to_array



2024-04-09 19:12:25.821583: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-04-09 19:12:25.821685: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-04-09 19:12:25.950238: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
path=glob('/kaggle/input/car-plate-detection/annotations/*.xml')
labels_dict=dict(filepath=[],xmin=[],xmax=[],ymin=[],ymax=[])

for filename in path:
    info=xet.parse(filename)
    root=info.getroot()
    member_object=root.find('object')
    labels_info=member_object.find('bndbox')
    xmin=int(labels_info.find('xmin').text)
    xmax=int(labels_info.find('xmax').text)
    ymin=int(labels_info.find('ymin').text)
    ymax=int(labels_info.find('ymax').text)
    
    labels_dict['filepath'].append(filename)
    labels_dict['xmin'].append(xmin)
    labels_dict['xmax'].append(xmax)
    labels_dict['ymin'].append(ymin)
    labels_dict['ymax'].append(ymax)

In [3]:
df=pd.DataFrame(labels_dict)
df.to_csv('labels.csv', index=False)
df.head()

Unnamed: 0,filepath,xmin,xmax,ymin,ymax
0,/kaggle/input/car-plate-detection/annotations/...,209,283,135,169
1,/kaggle/input/car-plate-detection/annotations/...,191,242,147,169
2,/kaggle/input/car-plate-detection/annotations/...,115,277,115,153
3,/kaggle/input/car-plate-detection/annotations/...,36,62,175,186
4,/kaggle/input/car-plate-detection/annotations/...,71,215,205,246


In [4]:
filename = df['filepath'][0]
def getFilename(filename):
    filename_image = xet.parse(filename).getroot().find('filename').text
    filepath_image = os.path.join('/kaggle/input/car-plate-detection/images',filename_image)
    return filepath_image
getFilename(filename)

'/kaggle/input/car-plate-detection/images/Cars339.png'

In [5]:
image_path = list(df['filepath'].apply(getFilename))
image_path[:10]#random check

['/kaggle/input/car-plate-detection/images/Cars339.png',
 '/kaggle/input/car-plate-detection/images/Cars13.png',
 '/kaggle/input/car-plate-detection/images/Cars74.png',
 '/kaggle/input/car-plate-detection/images/Cars16.png',
 '/kaggle/input/car-plate-detection/images/Cars291.png',
 '/kaggle/input/car-plate-detection/images/Cars236.png',
 '/kaggle/input/car-plate-detection/images/Cars429.png',
 '/kaggle/input/car-plate-detection/images/Cars132.png',
 '/kaggle/input/car-plate-detection/images/Cars251.png',
 '/kaggle/input/car-plate-detection/images/Cars51.png']

In [6]:
file_path = image_path[87] #path of our image N2.jpeg
img = cv2.imread(file_path) #read the image
# xmin-1804/ymin-1734/xmax-2493/ymax-1882 
img = io.imread(file_path) #Read the image
fig = px.imshow(img)
fig.update_layout(width=600, height=500, margin=dict(l=10, r=10, b=10, t=10),xaxis_title='Figure 1 - N2.jpeg with bounding box')
fig.add_shape(type='rect',x0=119, x1=268, y0=157, y1=193, xref='x', yref='y',line_color='cyan')

In [7]:
#Data read
labels=df.iloc[:,1:].values 
data=[]
output=[]

for ind in range(len(image_path)):
    image=image_path[ind]
    img_arr=cv2.imread(image)
    h,w,d=img_arr.shape
    
    #preprocessing 
    load_image=load_img(image,target_size=(224,224))
    load_image_arr=img_to_array(load_image)
    norm_load_image_arr = load_image_arr/255.0 # Normalization
    # Normalization to labels
    xmin,xmax,ymin,ymax = labels[ind]
    nxmin,nxmax = xmin/w,xmax/w
    nymin,nymax = ymin/h,ymax/h
    label_norm = (nxmin,nxmax,nymin,nymax) # Normalized output
    # Append
    data.append(norm_load_image_arr)
    output.append(label_norm)
            

In [8]:
X=np.array(data,dtype=np.float32)
y=np.array(output,dtype=np.float32)

In [9]:
# split the data into train and test
x_train,x_test,y_train,y_test=train_test_split(X,y, test_size=0.2,random_state=0)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((346, 224, 224, 3), (87, 224, 224, 3), (346, 4), (87, 4))

# Deep learning for object detection.
**Inception-Resnet-v2 model**


Inception-ResNet-v2 is a convolutional neural network that is trained on more than a million images from the ImageNet database. The network is 164 layers deep and can classify images into 1000 object categories, such as keyboard, mouse, pencil, and many animals. As a result, the network has learned rich feature representations for a wide range of images. The Inception-ResNet-v2 was used for the classification task. 

Here we will use the Inception-ResNet-v2 model with pre-trained weights and train this to our data. We are already import necessary libraries from TensorFlow previously, lets continue.

In [10]:
inception_resnet=InceptionResNetV2(weights="imagenet",include_top=False,input_tensor=Input(shape=(224,224,3)))
headmodel=inception_resnet.output
headmodel=Flatten()(headmodel)
headmodel=Dense(500,activation='relu')(headmodel)
headmodel=Dense(250,activation='relu')(headmodel)
headmodel=Dense(4,activation='sigmoid')(headmodel)


model=Model(inputs=inception_resnet.input, outputs=headmodel)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m219055592/219055592[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 0us/step


In [11]:
# compile model 
model.compile(loss='mse',optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4))
model.summary()

# Inception-RESNET-V2 Traing and save

In [12]:
tfb=TensorBoard('object_detection')
history=model.fit(x=x_train, y=y_train,batch_size=10,epochs=180,validation_data=(x_test,y_test),callbacks=[tfb])

Epoch 1/180


I0000 00:00:1712690194.354978      75 device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
W0000 00:00:1712690194.647623      75 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - loss: 0.0625   

W0000 00:00:1712690297.008897      75 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update
W0000 00:00:1712690309.816199      75 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m325s[0m 4s/step - loss: 0.0619 - val_loss: 0.0296
Epoch 2/180


W0000 00:00:1712690322.582771      75 graph_launch.cc:671] Fallback to op-by-op mode because memset node breaks graph update


[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 231ms/step - loss: 0.0135 - val_loss: 0.0176
Epoch 3/180
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 232ms/step - loss: 0.0073 - val_loss: 0.0156
Epoch 4/180
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 229ms/step - loss: 0.0069 - val_loss: 0.0137
Epoch 5/180
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 228ms/step - loss: 0.0055 - val_loss: 0.0136
Epoch 6/180
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 226ms/step - loss: 0.0037 - val_loss: 0.0244
Epoch 7/180
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 224ms/step - loss: 0.0040 - val_loss: 0.0152
Epoch 8/180
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 223ms/step - loss: 0.0025 - val_loss: 0.0129
Epoch 9/180
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 224ms/step - loss: 0.0029 - val_loss: 0.0137
Epoch 10/180
[1m35/35[0m [32m━━━━━━━━━━━━

In [13]:
model.save("object_detection.h5")

Loading our test picture with the right path.

In [14]:
path='/kaggle/input/number-plate-detection/TEST/TEST.jpeg'
image=load_img(path)
image=np.array(image,dtype=np.uint8)# 8 bit array (0,255)
image1=load_img(path,target_size=(224,224))
image_arr_224=img_to_array(image1)/255.0


#size of the original image
h,w,d=image.shape
print('Height of the image=',h)
print('width of the image=',w)

Height of the image= 729
width of the image= 901


In [15]:
fig = px.imshow(image)
fig.update_layout(width=700, height=500,  margin=dict(l=10, r=10, b=10, t=10), xaxis_title='Figure 13 - TEST Image')

In [16]:
# the shape of the image 
image_arr_224.shape

(224, 224, 3)

But in order to pass this image of a model, we need to provide the data in the dynamic fourth dimension. And what one indicates is a number of images. So here we are just passing only one image.

In [17]:
test_arr=image_arr_224.reshape(1,224,224,3)
test_arr.shape

(1, 224, 224, 3)

**DE-Normalize the output**

In [18]:
#make prediction
coords=model.predict(test_arr)
coords

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 17s/step


array([[0.3842708 , 0.6329274 , 0.6610499 , 0.76631975]], dtype=float32)

We have got the output from the model and output what we got is the normalized output. So, what we need to do is to convert back into our original form values, which actually we did in during the training process, in the training process, we have the original form values and convert that normalized one. So basically, we will de-normalize the values back

In [19]:
#Denormalize the value 
denorm=np.array([w,w,h,h])
coords=coords*denorm
coords

array([[346.2279793 , 570.2676034 , 481.90537888, 558.64709902]])

Now we will draw bounding box on top of the image. I just want to provide the two diagonal points. Let's make use of these points and let's draw the rectangle box.

In [20]:
coords=coords.astype(np.int32)
coords

array([[346, 570, 481, 558]], dtype=int32)

In [21]:
# Draw bounding on top the image
xmin, xmax,ymin,ymax = coords[0]
pt1 =(xmin,ymin)
pt2 =(xmax,ymax)
print(pt1, pt2)

(346, 481) (570, 558)


In [22]:
cv2.rectangle(image,pt1,pt2,(0,255,0),3)
fig = px.imshow(image)
fig.update_layout(width=700, height=500, margin=dict(l=10, r=10, b=10, t=10))

Now what we'll do, let's put it all together in one place and create function. And in the end visualize it. Our output will return image and coordinates of bounding box.

In [23]:
# Create pipeline
path = '../input/number-plate-detection/TEST/TEST.jpeg'
def object_detection(path):
    
    # Read image
    image = load_img(path) # PIL object
    image = np.array(image,dtype=np.uint8) # 8 bit array (0,255)
    image1 = load_img(path,target_size=(224,224))
    
    # Data preprocessing
    image_arr_224 = img_to_array(image1)/255.0 # Convert to array & normalized
    h,w,d = image.shape
    test_arr = image_arr_224.reshape(1,224,224,3)
    
    # Make predictions
    coords = model.predict(test_arr)
    
    # Denormalize the values
    denorm = np.array([w,w,h,h])
    coords = coords * denorm
    coords = coords.astype(np.int32)
    
    # Draw bounding on top the image
    xmin, xmax,ymin,ymax = coords[0]
    pt1 =(xmin,ymin)
    pt2 =(xmax,ymax)
    print(pt1, pt2)
    cv2.rectangle(image,pt1,pt2,(0,255,0),3)
    return image, coords

image, cods = object_detection(path)

fig = px.imshow(image)
fig.update_layout(width=700, height=500, margin=dict(l=10, r=10, b=10, t=10),xaxis_title='Figure 14')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
(346, 481) (570, 558)


In [24]:
img = np.array(load_img(path))
xmin ,xmax,ymin,ymax = cods[0]
roi = img[ymin:ymax,xmin:xmax]
fig = px.imshow(roi)
fig.update_layout(width=350, height=250, margin=dict(l=10, r=10, b=10, t=10),xaxis_title='Figure 15 Cropped image')

In [25]:
# extract text from image
text = pt.image_to_string(roi)
print(text)

(L51 K 4995
—

