In [1]:
import os
import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
import pytesseract as pt
import plotly.express as px
import matplotlib.pyplot as plt
import xml.etree.ElementTree as xet

from glob import glob
from skimage import io
from shutil import copy
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import TensorBoard
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import InceptionResNetV2
from tensorflow.keras.layers import Dense, Dropout, Flatten, Input
from tensorflow.keras.preprocessing.image import load_img, img_to_array


In [2]:
import tensorflow as tf
print(tf.__version__)

2.10.1


In [3]:
image_asset_path = "images/"

In [8]:
path = glob('../msc_anpr/images/*.xml')
labels_dict = dict(filepath=[],xmin=[],xmax=[],ymin=[],ymax=[])
for filename in path:

    info = xet.parse(filename)
    root = info.getroot()
    member_object = root.find('object')
    labels_info = member_object.find('bndbox')
    xmin = int(labels_info.find('xmin').text)
    xmax = int(labels_info.find('xmax').text)
    ymin = int(labels_info.find('ymin').text)
    ymax = int(labels_info.find('ymax').text)

    labels_dict['filepath'].append(filename)
    labels_dict['xmin'].append(xmin)
    labels_dict['xmax'].append(xmax)
    labels_dict['ymin'].append(ymin)
    labels_dict['ymax'].append(ymax)

In [9]:
df = pd.DataFrame(labels_dict)
df.to_csv('labels.csv',index=False)
df.head()

Unnamed: 0,filepath,xmin,xmax,ymin,ymax
0,../msc_anpr/images\N1.xml,1093,1396,645,727
1,../msc_anpr/images\N100.xml,134,301,312,350
2,../msc_anpr/images\N101.xml,31,139,128,161
3,../msc_anpr/images\N102.xml,164,316,216,243
4,../msc_anpr/images\N103.xml,813,1067,665,724


In [22]:

# parsing
def parsing(path):
    parser = xet.parse(path).getroot()
    name = parser.find('filename').text
    filename = f'number-plate-detection/images/{name}'

    # width and height
    parser_size = parser.find('size')
    width = int(parser_size.find('width').text)
    height = int(parser_size.find('height').text)
    
    return filename, width, height
df[['filename','width','height']] = df['filepath'].apply(parsing).apply(pd.Series)
df.head()
# print(df.head())

Unnamed: 0,filepath,xmin,xmax,ymin,ymax,filename,width,height,center_x,center_y,bb_width,bb_height
0,../msc_anpr/images\N1.xml,1093,1396,645,727,number-plate-detection/images/N1.jpeg,1920,1080,0.648177,0.635185,0.157812,0.075926
1,../msc_anpr/images\N100.xml,134,301,312,350,number-plate-detection/images/N100.jpeg,450,417,0.483333,0.793765,0.371111,0.091127
2,../msc_anpr/images\N101.xml,31,139,128,161,number-plate-detection/images/N101.jpeg,249,239,0.341365,0.604603,0.433735,0.138075
3,../msc_anpr/images\N102.xml,164,316,216,243,number-plate-detection/images/N102.jpeg,478,395,0.502092,0.581013,0.317992,0.068354
4,../msc_anpr/images\N103.xml,813,1067,665,724,number-plate-detection/images/N103.jpeg,1800,1200,0.522222,0.57875,0.141111,0.049167


In [None]:
filename = df['filepath'][0]
def getFilename(filename):
    filename_image = xet.parse(filename).getroot().find('filename').text
    filepath_image = os.path.join(image_asset_path,filename_image)
    return filepath_image
getFilename(filename)

In [23]:
# center_x, center_y, width , height
df['center_x'] = (df['xmax'] + df['xmin'])/(2*df['width'])
df['center_y'] = (df['ymax'] + df['ymin'])/(2*df['height'])

df['bb_width'] = (df['xmax'] - df['xmin'])/df['width']
df['bb_height'] = (df['ymax'] - df['ymin'])/df['height']
print(df.head())

                      filepath  xmin  xmax  ymin  ymax  \
0    ../msc_anpr/images\N1.xml  1093  1396   645   727   
1  ../msc_anpr/images\N100.xml   134   301   312   350   
2  ../msc_anpr/images\N101.xml    31   139   128   161   
3  ../msc_anpr/images\N102.xml   164   316   216   243   
4  ../msc_anpr/images\N103.xml   813  1067   665   724   

                                  filename  width  height  center_x  center_y  \
0    number-plate-detection/images/N1.jpeg   1920    1080  0.648177  0.635185   
1  number-plate-detection/images/N100.jpeg    450     417  0.483333  0.793765   
2  number-plate-detection/images/N101.jpeg    249     239  0.341365  0.604603   
3  number-plate-detection/images/N102.jpeg    478     395  0.502092  0.581013   
4  number-plate-detection/images/N103.jpeg   1800    1200  0.522222  0.578750   

   bb_width  bb_height  
0  0.157812   0.075926  
1  0.371111   0.091127  
2  0.433735   0.138075  
3  0.317992   0.068354  
4  0.141111   0.049167  


In [28]:
# make train and test directories
import os

# Define the directories to be created
directories = [
    "data_images/",
    "data_images/test/",
    "data_images/train/"
]

# Create each directory
for directory in directories:
    os.makedirs(directory, exist_ok=True)

In [None]:
image_path = list(df['filepath'].apply(getFilename))
image_path[:10]#random check

In [None]:
file_path = image_path[0] #path of our image N1.jpeg
img = io.imread(file_path) #Read the image
fig = px.imshow(img)
fig.update_layout(width=1000, height=1000, margin=dict(l=10, r=10, b=10, t=10),xaxis_title='Figure 8 - N2.jpeg with bounding box')
fig.add_shape(type='rect',x0=1093, x1=1396, y0=645, y1=727, xref='x', yref='y',line_color='cyan')