# Data Engineering Pipeline for License Plate Recognition

Here we will explore how to capture images from video streams and then use it to run different object detection models to obtain the objects with varying confidences

<b>Pre-requisites</b>: The notebook assumes that you have ffmpeg installed on the container or host where this notebook is running. If this is running on a container then make sure the container's docker file has the following 

apt-get update -qq && apt-get install ffmpeg -y

In [1]:
!pip install numpy 
!pip install matplotlib
!pip uninstall -y opencv-python-headless
!pip uninstall -y opencv-python
!pip uninstall -y cv2
!pip uninstall -y pylabel
!pip install pylabel
!pip install opencv-python-headless
!pip install pytesseract
!pip install ffmpeg-python

You should consider upgrading via the '/usr/bin/python -m pip install --upgrade pip' command.[0m
You should consider upgrading via the '/usr/bin/python -m pip install --upgrade pip' command.[0m
Found existing installation: opencv-python-headless 4.9.0.80
Uninstalling opencv-python-headless-4.9.0.80:
  Successfully uninstalled opencv-python-headless-4.9.0.80
Found existing installation: opencv-python 4.9.0.80
Uninstalling opencv-python-4.9.0.80:
  Successfully uninstalled opencv-python-4.9.0.80
Found existing installation: pylabel 0.1.55
Uninstalling pylabel-0.1.55:
  Successfully uninstalled pylabel-0.1.55
Collecting pylabel
  Using cached pylabel-0.1.55-py3-none-any.whl (27 kB)
Collecting opencv-python
  Using cached opencv_python-4.9.0.80-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (62.2 MB)
Installing collected packages: opencv-python, pylabel
Successfully installed opencv-python-4.9.0.80 pylabel-0.1.55
You should consider upgrading via the '/usr/bin/python -m pip ins

In [2]:
import numpy as np
import matplotlib.pyplot as plt 
import cv2 as cv
from pylabel import importer
from data_pipeline import Pipeline

# Main folder 
data_folder = "/workspace/shared-data/license-plates/"
annotations_folder = "annotations/"
images_folder = "../images/"

# Path to annotations
path_to_annotations = "/workspace/shared-data/license-plates/annotations/"

#Identify the path to get from the annotations to the images 
path_to_images = "../images/"

pipeline = Pipeline(data_folder,"License_Plate_Dataset", "udp://127.0.0.1:23000",images_folder, annotations_folder)

In [None]:
from dataset import Object_Detection_Dataset

dataset = Object_Detection_Dataset(data_folder='/workspace/shared-data/license-plates/alpr-images/cropped/',n_folds=10)
test_data = dataset.get_testing_dataset(3)

In [None]:
from model import Object_Detection_Model

model1 = Object_Detection_Model(cfg_file= "lpr-yolov3.cfg", weights_file='lpr-yolov3.weights', base_dir='/workspace/shared-data/license-plates/alpr-images/')
model2 = Object_Detection_Model(cfg_file= "lpr-yolov3-tiny.cfg", weights_file='lpr-yolov3-tiny.weights', base_dir='/workspace/shared-data/license-plates/alpr-images/')

In [None]:
for img_file in test_data:
    print(img_file)
    results1 = model1.test(img_file,list())
    results2 = model2.test(img_file,list())
    print(results1)
    print(results2)

In [None]:
print(results1)
print(results2)

In [None]:
original_img = cv.imread('/workspace/shared-data/license-plates/alpr-images/cropped/image12.jpeg')
resized_img = cv.resize(original_img, (416, 416)) 
plt.imshow(cv.cvtColor(resized_img, cv.COLOR_BGR2RGB))  


In [None]:
coords1 = results1[0][0]
coords2 = results1[1][0]
plt.imshow(cv.cvtColor(resized_img, cv.COLOR_BGR2RGB))  

In [None]:
cropped_image1 = resized_img[coords1[1]:coords1[1]+coords1[3] ,coords1[0]:coords1[0] + coords1[2] ]
plt.imshow(cv.cvtColor(cropped_image1, cv.COLOR_BGR2RGB)) 

In [None]:
cropped_image2 = resized_img[coords2[1]:coords2[1]+coords2[3] ,coords2[0]:coords2[0] + coords2[2] ]
plt.imshow(cv.cvtColor(cropped_image2, cv.COLOR_BGR2RGB)) 

In [3]:
# Perform the extract, transform and load
pipeline.extract()

1
/workspace/shared-data/license-plates/alpr-images/original/image1.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image2.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image3.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image4.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image5.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image6.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image7.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image8.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image9.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image10.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image11.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image12.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image13.jpeg
/workspace/shared-data/license-plates/alpr-images/original/image14.jpeg

In [None]:
original_img = cv.imread('/workspace/shared-data/license-plates/alpr-images/original/image2.jpeg')
cropped_image = original_img[1250:3000 ,700 :3000 ]
plt.imshow(cv.cvtColor(cropped_image, cv.COLOR_BGR2RGB))  

In [None]:
pipeline.transform()
cropped_img_list = pipeline.load()

In [None]:
plt.imshow(cv.cvtColor(cropped_img_list[17], cv.COLOR_BGR2RGB))  

In [None]:
img_test = cv.imread('/workspace/shared-data/license-plates/alpr-images/image470.jpeg')
#cropped_image = img_test[1250:3000 ,700 :3000 ]
plt.imshow(cv.cvtColor(img_test, cv.COLOR_BGR2RGB))  

In [None]:
pipeline.convert_pascal_to_coco_format("N2.jpeg")
pipeline.convert_pascal_to_coco_format("N5.jpeg")
pipeline.convert_pascal_to_coco_format()

In [None]:
dataset = importer.ImportVOC(path=path_to_annotations, path_to_images=path_to_images, name="License_Plate_Dataset")
dataset.df.head(5)

In [None]:
from IPython.display import display
display(dataset.visualize.ShowBoundingBoxes(1))
display(dataset.visualize.ShowBoundingBoxes("N62.jpeg"))

In [None]:
output_path_coco = "/workspace/shared-data/license-plates/coco/License_Plate_Dataset.json"
dataset.export.ExportToCoco(output_path=output_path_coco)

In [None]:
#Specify path to the coco.json file
path_to_annotations = "/workspace/shared-data/license-plates/coco/N2.json"

dataset_coco = importer.ImportCoco(path_to_annotations, path_to_images=path_to_images, name="License_Plate_coco")
dataset_coco.df.head(5)

Now we will load the Coco Json that we extracted and display the same images as above

In [None]:
from IPython.display import display
display(dataset_coco.visualize.ShowBoundingBoxes("N2.jpeg"))

Now we will load the Yolo files and check if the images are the same. It is interesting that the indexing is different 

In [None]:
# Output the Yolo files in a folder named yolo
pipeline.convert_pascal_to_yolo_format("N5.jpeg")

In [None]:
#Specify path to the coco.json file
path_to_annotations_yolo = "/workspace/shared-data/license-plates/yolo/labels"
path_to_images_yolo = "../../images/"

dataset_yolo = importer.ImportYoloV5(path_to_annotations_yolo , path_to_images=path_to_images_yolo, name="License_Plate_yolo")
dataset_yolo.df.head(5)

In [None]:
from IPython.display import display
display(dataset_yolo.visualize.ShowBoundingBoxes("N5.jpeg"))

Now that we have tested the above, we will run through a script to generate all of these

In [None]:
import os

# assign directory
directory = '/workspace/shared-data/license-plates/images'
 
# Loop through the directory
for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    
    # checking if it is a file
    if os.path.isfile(f):
        pipeline.convert_pascal_to_coco_format(filename)
        pipeline.convert_pascal_to_yolo_format(filename)

We will now leverage methods to crop the vehicle image to get the license plate and then use it to detect the license plate number

In [None]:
cropped_img = pipeline.get_licence_plate_image("N5.jpeg")

In [None]:
licence_plate_no =  pipeline.detect_licence_plate("N5_cropped.jpeg")
print(licence_plate_no)

AREA BELOW IN THE NOTEBOOK IS FOR FURTHER EXPERIMENTATION

In [None]:
#Specify path to the coco.json file

img_name = "N190"
img_extension = ".jpeg"
img_file_name = img_name + img_extension
path_to_annotations = "/workspace/shared-data/license-plates/coco/" + img_name + ".json"
path_to_images = "../images/"
dataset_coco = importer.ImportCoco(path_to_annotations, path_to_images=path_to_images, name="License_Plate_coco")

img_row = dataset_coco.df.loc[dataset_coco.df['img_filename'] == img_file_name]
x = int(img_row['ann_bbox_xmin'])
y = int(img_row['ann_bbox_ymin'])
w = int(img_row['ann_bbox_xmax'])
h = int(img_row['ann_bbox_ymax'])
print([x,y,w,h])

# We will put a margin for error handling
margin = 60 

# Path to image
img_folder = "/workspace/shared-data/license-plates/images/"
img = cv.imread(img_folder + img_file_name)

# Write cropped image
cropped_image = img[y:h , x :w ]
cropped_img_file_name = img_name + "_cropped" +  img_extension
cv.imwrite(cropped_img_file_name, cropped_image)

In [None]:
plt.imshow(cv.cvtColor(img, cv.COLOR_BGR2RGB))  

In [None]:
plt.imshow(cv.cvtColor(cropped_image, cv.COLOR_BGR2RGB))  

In [None]:
import pytesseract
import re

# Read the cropped image
img_to_read = "/workspace/shared-data/hemdev-705.603Spring24/license-plate-detection/" + cropped_img_file_name
img_cv = cv.imread(img_to_read)

# Resize the image slighty to see if it covers slightly misoriented values
img_resized = cv.resize(img_cv, None, fx = 2, fy = 2,  interpolation = cv.INTER_CUBIC)

# Convert to grayscale
img_gray = cv.cvtColor(img_cv, cv.COLOR_BGR2GRAY)

# Predict using OCR
prediction = pytesseract.image_to_string(img_gray, lang ='eng', config ='--oem 3 --psm 8 ')

# Extract the text between first 
license_plate_no = prediction
print(license_plate_no)
filter_predicted_result = prediction.split()
for component in filter_predicted_result:
    if (len(component) > 4):
        license_plate_no = component
        break
    
print("second = " + license_plate_no)
def letter_or_digit(s):
    m = re.search(r'[a-z0-9]', s, re.I)
    if m is not None:
        return m.start()
    return -1

start = letter_or_digit(prediction)
license_plate_no = prediction[start:]
print(prediction)
print(license_plate_no)
license_plate_no = re.sub(r'[^A-Z0-9-]+', '', license_plate_no)
print(license_plate_no)