In [1]:
from ultralytics import YOLO
import re
import cv2
import easyocr
import numpy as np
import os

model = YOLO('../runs/train/weights/best.pt')
ocr_reader = easyocr.Reader(['en'])

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.
Downloading detection model, please wait. This may take several minutes depending upon your network connection.


Progress: |██████████████████████████████████████████████████| 100.0% Complete

Downloading recognition model, please wait. This may take several minutes depending upon your network connection.


Progress: |██████████████████████████████████████████████████| 100.0% Complete

In [3]:
results = model.val()
print(f'Precision: {results.results_dict['metrics/precision(B)']}')
print(f'Recall: {results.results_dict['metrics/recall(B)']}')

Ultralytics 8.3.47  Python-3.12.6 torch-2.5.1+cpu CPU (12th Gen Intel Core(TM) i5-12500H)
Model summary (fused): 168 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning D:\VSCodeProjects\LicensePlateRecognition\dataset\labels\val.cache... 1145 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1145/1145 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 72/72 [02:42<00:00,  2.26s/it]


                   all       1145       1313       0.98      0.972      0.992      0.837
          LicensePlate        409        410      0.976      0.983      0.993      0.807
          LicensePlate        753        903      0.983       0.96      0.991      0.868
Speed: 2.4ms preprocess, 112.2ms inference, 0.0ms loss, 0.8ms postprocess per image
Results saved to [1mD:\runs\detect\val[0m
Precision: 0.9796212948511402
Recall: 0.971514603136906


In [2]:
# Xác định các bounding boxes
def detect(image):
    detection_results = model(image)
    bounding_boxes = []

    # Duyệt qua từng bounding box được phát hiện
    for detected_box in detection_results[0].boxes:
        # Thêm bounding box vào danh sách
        bounding_boxes.append(detected_box.xyxy[0].tolist())

    return bounding_boxes


# Chuẩn hoá kết quả nhận diện
def normalize(ocr_result):
    # Giữ lại các ký tự chữ cái (a-z, A-Z), số (0-9)
    normalized_result = re.sub(r'[^a-zA-Z0-9]', '', ocr_result)
    
    # Thay thế ký tự thứ 3 theo điều kiện
    if len(normalized_result) >= 3:
        normalized_chars = {'8' : 'B', '2' : 'Z', '7': 'Z', '4' : 'A', '5' : 'S', '0' : 'O'}
        thirst_char = normalized_result[2]
        if thirst_char in normalized_chars:
            normalized_result = normalized_result[:2] + normalized_chars[thirst_char] + normalized_result[3:]

    return normalized_result

# Nhận diện và trả về văn bản
def recognize(image):
    bounding_boxes = detect(image)
    result_texts = []
    for x_min, y_min, x_max, y_max in bounding_boxes:
        # Cắt vùng biển số từ ảnh
        cropped_plate = image[int(y_min):int(y_max), int(x_min):int(x_max)]

        # Chuyển đổi sang ảnh grayscale
        gray_plate = cv2.cvtColor(cropped_plate, cv2.COLOR_BGR2GRAY)

        # Xử lý tăng cường hình ảnh
        blurred_plate = cv2.GaussianBlur(gray_plate, (5, 5), 0)
        kernel = np.ones((5, 5), np.uint8)
        top_hat = cv2.morphologyEx(blurred_plate, cv2.MORPH_TOPHAT, kernel)
        black_hat = cv2.morphologyEx(blurred_plate, cv2.MORPH_BLACKHAT, kernel)
        enhanced_plate = cv2.add(blurred_plate, top_hat)
        enhanced_plate = cv2.subtract(enhanced_plate, black_hat)

        # Nhận diện ký tự trên biển số bằng OCR
        ocr_results = ocr_reader.readtext(np.array(enhanced_plate))

        # Lấy nội dung ký tự từ kết quả OCR
        plate_text = ''.join([text[-2] for text in sorted(ocr_results, key=lambda x: x[0][0][1])])

        # Chuẩn hoá kết quả
        normalized_text = normalize(plate_text)

        # Thêm vào danh sách kết quả
        result_texts.append(normalized_text)

    return result_texts

In [None]:
# Cắt ảnh sử dụng mô hình YOLO
def crop_image_using_yolo(original_image, margin=50):
    bounding_boxes = detect(original_image)
    # Danh sách các vùng đã cắt từ ảnh
    cropped_images = [original_image[int(y_min):int(y_max), int(x_min):int(x_max)] for
                      x_min, y_min, x_max, y_max in
                      bounding_boxes]
    if not cropped_images:
        return None

    # Tạo lưới hiển thị
    # Số hàng/cột của lưới
    grid_size = int(np.ceil(np.sqrt(len(cropped_images))))
    # Kích thước ảnh nhỏ (thumbnail)
    thumb_size = 150
    grid_height = grid_size * (thumb_size + margin) - margin
    # Tạo nền trắng
    grid_width = grid_size * (thumb_size + margin) - margin
    crop_image = np.zeros((grid_height, grid_width, 3), dtype=np.uint8) + 0

    for idx, cropped_image in enumerate(cropped_images):
        if cropped_image.size > 0:
            # Resize vùng cắt thành kích thước thumbnail
            thumbnail = cv2.resize(cropped_image, (thumb_size, thumb_size))
            row = idx // grid_size
            col = idx % grid_size
            # Xác định vị trí chèn thumbnail vào lưới, bao gồm margin
            start_y = row * (thumb_size + margin)
            start_x = col * (thumb_size + margin)
            crop_image[start_y:start_y + thumb_size, start_x:start_x + thumb_size, :] = thumbnail
    return crop_image

def show_cropped_images(dir_path):
    for filename in os.listdir(dir_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            file_path = os.path.join(dir_path, filename)
            original_image = cv2.imread(file_path)

            cropped_image = crop_image_using_yolo(original_image)

            # Hiển thị kết quả
            before_img_name = f"{filename} - Before cropped"
            after_img_name = f"{filename} - After cropped"

            cv2.imshow(before_img_name, original_image)
            cv2.moveWindow(before_img_name, 100, 100)

            cv2.imshow(after_img_name, cropped_image)
            cv2.moveWindow(after_img_name, 120 + original_image.shape[1], 100)

            cv2.waitKey(0)
            cv2.destroyAllWindows()

show_cropped_images('../data/cropped_data')


0: 416x640 2 LicensePlates, 122.3ms
Speed: 8.0ms preprocess, 122.3ms inference, 9.5ms postprocess per image at shape (1, 3, 416, 640)

0: 512x640 4 LicensePlates, 128.8ms
Speed: 5.0ms preprocess, 128.8ms inference, 6.0ms postprocess per image at shape (1, 3, 512, 640)

0: 416x640 1 LicensePlate, 107.5ms
Speed: 3.0ms preprocess, 107.5ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 480x640 1 LicensePlate, 121.9ms
Speed: 4.0ms preprocess, 121.9ms inference, 1.0ms postprocess per image at shape (1, 3, 480, 640)


In [None]:
def process_image(image):
    texts = recognize(image)
    bounding_boxes = detect(image)

    for text, (x_min, y_min, x_max, y_max) in zip(texts, bounding_boxes):
        cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 2)
        cv2.putText(image, text, (int(x_min), int(y_min) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    return image

path_dir = '../data/main_data/images'
for filename in os.listdir(path_dir):
    img = cv2.imread(os.path.join(path_dir, filename))
    processed_image = process_image(img)
    cv2.imshow('Processed Image', processed_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows() 


0: 640x416 1 LicensePlate, 132.1ms
Speed: 7.0ms preprocess, 132.1ms inference, 9.0ms postprocess per image at shape (1, 3, 640, 416)

0: 640x416 1 LicensePlate, 66.3ms
Speed: 2.0ms preprocess, 66.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 416)


In [None]:
def process_image(image):
    texts = recognize(image)
    bounding_boxes = detect(image)

    for text, (x_min, y_min, x_max, y_max) in zip(texts, bounding_boxes):
        cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 2)
        cv2.putText(image, text, (int(x_min), int(y_min) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    return image

def process_camera():
    cap = cv2.VideoCapture(0)
    
    if not cap.isOpened():
        print("Không thể mở camera")
        return

    while True:
        ret, frame = cap.read()
        if not ret:
            break  

        processed_frame = process_image(frame)
        cv2.imshow('Processed Camera Feed', processed_frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()


process_camera()

In [None]:
def process_image(image):
    texts = recognize(image)
    bounding_boxes = detect(image)

    for text, (x_min, y_min, x_max, y_max) in zip(texts, bounding_boxes):
        cv2.rectangle(image, (int(x_min), int(y_min)), (int(x_max), int(y_max)), (0, 255, 0), 2)
        cv2.putText(image, text, (int(x_min), int(y_min) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    return image

def display_video(input_video_path, output_video_path):
    cap = cv2.VideoCapture(input_video_path)
    
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break  

        processed_frame = process_image(frame)
        out.write(processed_frame)
        cv2.imshow('Processed Video', processed_frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    out.release()
    cv2.destroyAllWindows()

display_video('../data/video_data/inputs/input2.mp4', '../data/video_data/outputs/output2.mp4')


0: 384x640 1 LicensePlate, 199.1ms
Speed: 6.2ms preprocess, 199.1ms inference, 9.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LicensePlate, 100.2ms
Speed: 2.0ms preprocess, 100.2ms inference, 1.1ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LicensePlate, 153.6ms
Speed: 4.0ms preprocess, 153.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LicensePlate, 97.9ms
Speed: 2.0ms preprocess, 97.9ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LicensePlate, 110.3ms
Speed: 3.0ms preprocess, 110.3ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LicensePlate, 110.0ms
Speed: 1.9ms preprocess, 110.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LicensePlate, 130.1ms
Speed: 4.0ms preprocess, 130.1ms inference, 2.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 LicensePlate, 118.0ms
Speed: 2.0ms preprocess, 118

In [5]:
exact_texts = dict()

for filename in os.listdir('../data/main_data/labels'):
    key = os.path.splitext(filename)[0]
    with open(os.path.join('../data/main_data/labels', filename)) as file:
        value = file.readline().strip()
    exact_texts[key] = value

predict_texts = dict()

for filename in os.listdir('../data/main_data/images'):
    key = os.path.splitext(filename)[0]
    image = cv2.imread(os.path.join('../data/main_data/images', filename))
    value = recognize(image)[0]
    predict_texts[key] = value


0: 640x416 1 LicensePlate, 83.4ms
Speed: 2.9ms preprocess, 83.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 416)

0: 640x416 1 LicensePlate, 61.0ms
Speed: 1.0ms preprocess, 61.0ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 416)

0: 416x640 1 LicensePlate, 90.7ms
Speed: 2.0ms preprocess, 90.7ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 LicensePlate, 86.5ms
Speed: 2.0ms preprocess, 86.5ms inference, 2.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 LicensePlate, 74.0ms
Speed: 3.0ms preprocess, 74.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 LicensePlate, 81.0ms
Speed: 2.0ms preprocess, 81.0ms inference, 0.9ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 LicensePlate, 78.1ms
Speed: 2.0ms preprocess, 78.1ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)

0: 416x640 1 LicensePlate, 73.0ms
Speed: 2.0ms preprocess, 73.0ms inference

In [6]:
absolute_accuracy = sum(predict_texts[key] == exact_texts[key] for key in predict_texts.keys()) / len(predict_texts)

def is_one_character_different(str1, str2):
    return len(str1) == len(str2) and sum(c1 != c2 for c1, c2 in zip(str1, str2)) == 1

relative_accuracy = sum(is_one_character_different(predict_texts[key], exact_texts[key]) for key in predict_texts.keys()) / len(predict_texts)

print('Absolute accuracy:')
print(absolute_accuracy)
print('Relative accuracy:')
print(relative_accuracy)

Absolute accuracy:
0.575
Relative accuracy:
0.18333333333333332


In [None]:
def process_license_plate(input_image_path):
    import cv2
    import numpy as np
    from skimage.morphology import disk, opening
    import matplotlib.pyplot as plt

    def plot_img(img):
        plt.imshow(img, cmap='gray' if len(img.shape) == 2 else None)
        plt.show()

    def resize_image(image, width=472, height=303):
        return cv2.resize(image, (width, height), cv2.INTER_CUBIC)
    
    def apply_blur(image):
        gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        blurred_image = cv2.medianBlur(gray_image, 3)
        return cv2.GaussianBlur(blurred_image, (3, 3), 3)

    
    def threshold_image(image):
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(15, 15))
        contrast_enhanced = clahe.apply(image)
        struct_element = disk(20)
        opened_image = opening(contrast_enhanced, struct_element)
        enhanced_image = cv2.subtract(contrast_enhanced, opened_image.astype(np.uint8))
        _, binary_image = cv2.threshold(enhanced_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
        return cv2.convertScaleAbs(cv2.equalizeHist(binary_image), alpha=1.5, beta=0)

    
    def clean_image(image):
        kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3))
        eroded_image = cv2.erode(image, kernel, iterations=2)
        h, w = eroded_image.shape
        mask = np.zeros((h + 2, w + 2), np.uint8)
        flood_filled = eroded_image.copy()
        cv2.floodFill(flood_filled, mask, seedPoint=(0, 0), newVal=255, loDiff=(10,), upDiff=(10,))
        inverted_filled = cv2.bitwise_not(flood_filled)
        return cv2.bitwise_or(eroded_image, inverted_filled)


    def extract_license_plates(original_image, processed_image):
        contours, _ = cv2.findContours(processed_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        plates = []
        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            aspect_ratio = w / h
            area = w * h

            if 0.5 < aspect_ratio < 2 and w > 50 and h > 20 and 1000 < area < 9500:
                plate = original_image[y:y + h, x:x + w]
                plt.imshow(plate, cmap='gray')
                plt.title("Detected License Plate")
                plt.show()
                plates.append(plate)
        return plates

    image = cv2.imread(input_image_path)
    resized_image = resize_image(image)
    blurred_image = apply_blur(resized_image)
    thresholded_image = threshold_image(blurred_image)
    cleaned_image = clean_image(thresholded_image)
    license_plates = extract_license_plates(resized_image, cleaned_image)

    return license_plates