<center>
<h1>
Number Plate Recognition
</h1>
</center>

## Pytesseract Number Plate Recognition

In [1]:
# !pip install pytesseract
# !pip install Pillow

### Besides the above installation one needs to also install tesseract-ocr, which can be facilitated throught he following guide:
https://stackoverflow.com/questions/46140485/tesseract-installation-in-windows

In [2]:
import cv2
import pytesseract
import matplotlib.pyplot as plt
import os
import json

pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'

# Number plate detection
number_plate_path = 'ground_truth/cutout_number_plate_coco.json'

# Loading the number plater positions from the coco json file
with open(number_plate_path, 'r') as file:
    number_plate_positions = json.load(file)

annotation_lookup_dictionary = {}
for annotation in number_plate_positions['annotations']:
    annotation_lookup_dictionary[annotation['image_id']] = annotation

# Image path
image_path = 'cutout_images'

# Extracting the text from the number plate
def extract_text_from_number_plate(number_plates, do_display=False):
    text = []
    # Iterating through the number plates
    for number_plate in number_plates:
        if do_display:
            # Plotting the number plate
            plt.imshow(number_plate)
            plt.axis('off')
            plt.show()

        # Extracting the text from the number plate
        text.append(pytesseract.image_to_string(number_plate))

        if do_display:
            print(f'Text: {text[-1]}')
            print()
    return text

# Save prediction path
prediction_path = 'number_plate_predictions/tesseract/predictions_3840x2160.json'

# Adding data from the number plate positions to the coco output
coco_output = {
    'info': number_plate_positions['info'],
    'licenses': number_plate_positions['licenses'],
    'images': number_plate_positions['images'],
    'annotations': [],
    'categories': number_plate_positions['categories'],
}

# Iterating through the number plate images in the dataset
for info in number_plate_positions['images']:
    # Reset image path
    image_path = 'cutout_images'
    image_name = info['file_name']
    print(f'Processing image: {image_name}')
    image_path = os.path.join(image_path, image_name)
    number_plates = []
    # Reading the image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    number_plates.append(image)
    # Extracting the text from the number plate
    text = extract_text_from_number_plate(number_plates)
    
    # Appending the annotations from the number plate positions based on image id
    image_id = info['id']
    annotation = annotation_lookup_dictionary[image_id]

    # Setting the label to the extracted text
    annotation['label'] = text[0]
    coco_output['annotations'].append(annotation)

# Saving the coco output
# Creating the directory if it does not exist
os.makedirs(os.path.dirname(prediction_path), exist_ok=True)
with open(prediction_path, 'w') as file:
    json.dump(coco_output, file, indent=4)


Processing image: video1_frame_130_cutout_82.jpg
Processing image: video1_frame_130_cutout_83.jpg
Processing image: video1_frame_130_cutout_84.jpg
Processing image: video1_frame_130_cutout_99.jpg
Processing image: video1_frame_537_cutout_314.jpg
Processing image: video1_frame_537_cutout_315.jpg
Processing image: video1_frame_537_cutout_316.jpg
Processing image: video1_frame_537_cutout_317.jpg
Processing image: video1_frame_86_cutout_394.jpg
Processing image: video1_frame_86_cutout_395.jpg
Processing image: video1_frame_86_cutout_396.jpg
Processing image: video1_frame_539_cutout_443.jpg
Processing image: video1_frame_539_cutout_444.jpg
Processing image: video1_frame_539_cutout_445.jpg
Processing image: video1_frame_539_cutout_446.jpg
Processing image: video1_frame_546_cutout_706.jpg
Processing image: video1_frame_546_cutout_707.jpg
Processing image: video1_frame_546_cutout_708.jpg
Processing image: video1_frame_546_cutout_709.jpg
Processing image: video1_frame_734_cutout_871.jpg
Process

In [3]:
# Checking which the percentage amount of labels that are not empty
prediction_path = 'number_plate_predictions/tesseract/predictions_3840x2160.json'

# Loading the coco output
with open(prediction_path, 'r') as file:
    coco_output = json.load(file)

# Counting the number of empty labels
empty_labels = 0

# Iterating through the annotations
for annotation in coco_output['annotations']:
    if annotation['label'] == '':
        empty_labels += 1

# Calculating the percentage of empty labels
percentage_empty_labels = empty_labels / len(coco_output['annotations']) * 100
print(f'Percentage of empty labels: {percentage_empty_labels:.2f}%')

# Checking the percentage of labels that are not empty
percentage_non_empty_labels = 100 - percentage_empty_labels
print(f'Percentage of non-empty labels: {percentage_non_empty_labels:.2f}%')

Percentage of empty labels: 75.09%
Percentage of non-empty labels: 24.91%


## Easy OCR Number Plate Recognition

In [4]:
# !pip install easyocr

In [5]:
import cv2
import matplotlib.pyplot as plt
import os
import json
import easyocr

# Number plate reader
reader = easyocr.Reader(['en'])

# Number plate detection
number_plate_path = 'ground_truth/cutout_number_plate_coco.json'

# Loading the number plater positions from the coco json file
with open(number_plate_path, 'r') as file:
    number_plate_positions = json.load(file)

annotation_lookup_dictionary = {}
for annotation in number_plate_positions['annotations']:
    annotation_lookup_dictionary[annotation['image_id']] = annotation

# Image path
image_path = 'cutout_images'

# Extracting the text from the number plate
def extract_text_from_number_plate(number_plates, do_display=False):
    text = []
    # Iterating through the number plates
    for number_plate in number_plates:
        if do_display:
            # Plotting the number plate
            plt.imshow(number_plate)
            plt.axis('off')
            plt.show()

        # Extracting the text from the number plate
        result = reader.readtext(number_plate)
        
        if result is not None and len(result) > 0:
            text.append(str(result[0][-2]))
        else:   
            text.append('')

        if do_display:
            print(f'Text: {text[-1]}')
            print()
    return text

# Save prediction path
prediction_path = 'number_plate_predictions/easyocr/predictions_3840x2160.json'

# Adding data from the number plate positions to the coco output
coco_output = {
    'info': number_plate_positions['info'],
    'licenses': number_plate_positions['licenses'],
    'images': number_plate_positions['images'],
    'annotations': [],
    'categories': number_plate_positions['categories'],
}

# Iterating through the number plate images in the dataset
for info in number_plate_positions['images']:
    # Reset image path
    image_path = 'cutout_images'
    image_name = info['file_name']
    print(f'Processing image: {image_name}')
    image_path = os.path.join(image_path, image_name)
    number_plates = []
    # Reading the image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    number_plates.append(image)
    # Extracting the text from the number plate
    text = extract_text_from_number_plate(number_plates)
    
    # Appending the annotations from the number plate positions based on image id
    image_id = info['id']
    annotation = annotation_lookup_dictionary[image_id]

    # Setting the label to the extracted text
    annotation['label'] = text[0]
    coco_output['annotations'].append(annotation)

# Saving the coco output
os.makedirs(os.path.dirname(prediction_path), exist_ok=True)
with open(prediction_path, 'w') as file:
    json.dump(coco_output, file, indent=4)


Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


Processing image: video1_frame_130_cutout_82.jpg
Processing image: video1_frame_130_cutout_83.jpg
Processing image: video1_frame_130_cutout_84.jpg
Processing image: video1_frame_130_cutout_99.jpg
Processing image: video1_frame_537_cutout_314.jpg
Processing image: video1_frame_537_cutout_315.jpg
Processing image: video1_frame_537_cutout_316.jpg
Processing image: video1_frame_537_cutout_317.jpg
Processing image: video1_frame_86_cutout_394.jpg
Processing image: video1_frame_86_cutout_395.jpg
Processing image: video1_frame_86_cutout_396.jpg
Processing image: video1_frame_539_cutout_443.jpg
Processing image: video1_frame_539_cutout_444.jpg
Processing image: video1_frame_539_cutout_445.jpg
Processing image: video1_frame_539_cutout_446.jpg
Processing image: video1_frame_546_cutout_706.jpg
Processing image: video1_frame_546_cutout_707.jpg
Processing image: video1_frame_546_cutout_708.jpg
Processing image: video1_frame_546_cutout_709.jpg
Processing image: video1_frame_734_cutout_871.jpg
Process

In [6]:
# Checking which the percentage amount of labels that are not empty
prediction_path = 'number_plate_predictions/easyocr/predictions_3840x2160.json'

# Loading the coco output
with open(prediction_path, 'r') as file:
    coco_output = json.load(file)

# Counting the number of empty labels
empty_labels = 0

# Iterating through the annotations
for annotation in coco_output['annotations']:
    if annotation['label'] == '':
        empty_labels += 1

# Calculating the percentage of empty labels
percentage_empty_labels = empty_labels / len(coco_output['annotations']) * 100
print(f'Percentage of empty labels: {percentage_empty_labels:.2f}%')

# Checking the percentage of labels that are not empty
percentage_non_empty_labels = 100 - percentage_empty_labels
print(f'Percentage of non-empty labels: {percentage_non_empty_labels:.2f}%')

Percentage of empty labels: 50.43%
Percentage of non-empty labels: 49.57%
