In [1]:
import pytesseract
import easyocr
from paddleocr import PaddleOCR
import matplotlib.pyplot as plt
import numpy as np
from pipeline import Pipeline
from huggingface_hub import hf_hub_download
from tqdm import tqdm
import logging

logging.getLogger("ppocr").setLevel(logging.ERROR)

model_path = hf_hub_download(repo_id="Pikurrot/yolo11n-licenseplates", filename="yolo11n_licenseplates.pt")
pipeline = Pipeline(model_path)

Using device cuda


In [2]:
detections = pipeline.detect("DatasetTotal/Frontal", 640, 0.25, -1)
crops = pipeline.extract_boxes(detections)


image 1/69 /media/eric/D/repos/ANPR-GIA/DatasetTotal/Frontal/0129KMK.jpg: 288x640 1 license_plate, 193.2ms
image 2/69 /media/eric/D/repos/ANPR-GIA/DatasetTotal/Frontal/0195JHM.jpg: 576x640 1 license_plate, 107.1ms
image 3/69 /media/eric/D/repos/ANPR-GIA/DatasetTotal/Frontal/0225JWL.jpg: 640x480 1 license_plate, 133.6ms
image 4/69 /media/eric/D/repos/ANPR-GIA/DatasetTotal/Frontal/0226MPP.jpg: 640x480 1 license_plate, 18.6ms
image 5/69 /media/eric/D/repos/ANPR-GIA/DatasetTotal/Frontal/0278KXW.jpg: 544x640 1 license_plate, 122.0ms
image 6/69 /media/eric/D/repos/ANPR-GIA/DatasetTotal/Frontal/0304FWK.jpg: 448x640 1 license_plate, 86.3ms
image 7/69 /media/eric/D/repos/ANPR-GIA/DatasetTotal/Frontal/0425KDK_.jpg: 640x480 (no detections), 19.1ms
image 8/69 /media/eric/D/repos/ANPR-GIA/DatasetTotal/Frontal/0492MTT.jpg: 640x640 1 license_plate, 26.8ms
image 9/69 /media/eric/D/repos/ANPR-GIA/DatasetTotal/Frontal/0550HNN.jpg: 640x640 2 license_plates, 29.8ms
image 10/69 /media/eric/D/repos/ANPR-GI

In [3]:
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'
easyocr_reader = easyocr.Reader(['en'])
paddleocr_reader = PaddleOCR(use_angle_cls=True, lang='en')

In [8]:
import time

gt = [
	plate["path"].split("/")[-1].split(".")[0]
	for img in crops
	for plate in img
]
pred_tesseract = []
pred_easyocr = []
pred_paddleocr = []
time_tesseract = []
time_easyocr = []
time_paddleocr = []

for img in tqdm(crops):
	for plate in img:
		path, plate = plate.values()
		start_time = time.time()
		res_tesseract = pytesseract.image_to_data(
			plate,
			config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ',
			output_type=pytesseract.Output.DICT
		)
		res_tesseract = res_tesseract["text"][-1].replace(" ", "")
		time_tesseract.append(time.time() - start_time)
		start_time = time.time()
		res_easyocr = easyocr_reader.readtext(
			np.array(plate),
			allowlist="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ",
		)
		res_easyocr = "".join([recog[-2] for recog in res_easyocr]).replace(" ", "")
		time_easyocr.append(time.time() - start_time)
		start_time
		res_paddleocr = paddleocr_reader.ocr(np.array(plate), det=False, rec=True, cls=False)
		print(res_paddleocr)
		res_paddleocr = res_paddleocr[0][0][0].replace(" ", "")
		time_paddleocr.append(time.time() - start_time)
		pred_tesseract.append(res_tesseract)
		pred_easyocr.append(res_easyocr)
		pred_paddleocr.append(res_paddleocr)
		break
	break

  0%|          | 0/69 [00:00<?, ?it/s]

[[('0129 KMK', 0.9188997745513916)]]





In [34]:
def accuracy(gt, pred):
	return sum([1 for i in range(len(gt)) if gt[i] == pred[i]]) / len(gt)

from Levenshtein import distance

def compute_similarity_percentage(predictions, gt):
    similarities = []
    for pred, true in zip(predictions, gt):
        d = distance(pred, true)
        max_len = max(len(pred), len(true))
        if max_len > 0:
            similarity = (1 - d / max_len) * 100  # Convert to percentage
        else:
            similarity = 100.0  # Both strings are empty
        similarities.append(similarity)
    
    average_similarity = sum(similarities) / len(similarities) if similarities else 0
    return similarities, average_similarity

print("Tesseract accuracy:", accuracy(gt, pred_tesseract), "Time:", np.mean(time_tesseract))
print("EasyOCR accuracy:", accuracy(gt, pred_easyocr), "Time:", np.mean(time_easyocr))
print("PaddleOCR accuracy:", accuracy(gt, pred_paddleocr), "Time:", np.mean(time_paddleocr))
print("Tesseract similarity:", compute_similarity_percentage(pred_tesseract, gt)[1])
print("EasyOCR similarity:", compute_similarity_percentage(pred_easyocr, gt)[1])
print("PaddleOCR similarity:", compute_similarity_percentage(pred_paddleocr, gt)[1])

Tesseract accuracy: 0.2222222222222222 Time: 0.4380912846989102
EasyOCR accuracy: 0.3194444444444444 Time: 0.15616274873415628
PaddleOCR accuracy: 0.7916666666666666 Time: 0.5329718987147013
Tesseract similarity: 71.2367724867725
EasyOCR similarity: 78.90312440408297
PaddleOCR similarity: 90.39627425044092
