In [3]:
import cv2
import math 

from ultralytics import YOLO
import pytesseract

import os
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

import imutils
import re
import numpy as np
from PIL import Image

Lectura de matrícula

In [2]:
def pruebas(image):
    # Create a copy of the image to avoid modifying the original
    image_copy = image.copy()

    gray = cv2.cvtColor(image_copy, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    gray = cv2.bilateralFilter(gray, 11, 17, 17)

    edged = cv2.Canny(gray, 170, 200)
    cnts, _ = cv2.findContours(edged.copy(), cv2.RECURS_FILTER, cv2.CHAIN_APPROX_SIMPLE)
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:30]

    NumberPlateCount = None

    for i in cnts:
        perimeter = cv2.arcLength(i, True)
        approx = cv2.approxPolyDP(i, 0.02 * perimeter, True)
        if len(approx) == 4:
            NumberPlateCount = approx
            x, y, w, h = cv2.boundingRect(i)
            crp_img = image_copy[y:y + h, x:x + w]

            # Guardar la imagen recortada temporalmente
            cv2.imwrite("./media/Output/temp1.png", crp_img)

            # Convierte la imagen umbralizada a texto utilizando pytesseract
            custom_config = r'--psm 6 --oem 3'
            text = pytesseract.image_to_string(crp_img, lang='eng', config=custom_config)

            # Después de obtener el texto de Tesseract
            if text is not None:
                # Filtrar solo caracteres alfanuméricos
                text = re.sub(r'[^a-zA-Z0-9]', ' ', text)
                # Verificar la longitud y otros criterios según el formato de la matrícula
                if len(text) > 0 and len(text) <= 10:
                    # Procesar la matrícula reconocida
                    print(f"Text to be displayed: {text}")
                    cv2.putText(image, text, (x, y - 15), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
                    print(f"Número de placa en {image_file}: {text}")

            break

    # Dibujar contorno y mostrar el frame
    if NumberPlateCount is not None:
        cv2.drawContours(image, [NumberPlateCount], -1, (0, 255, 0), 3)

    cv2.imshow(f"Vehículos - {image_file}", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

In [4]:
# Carga del modelo
model = YOLO(r'D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\data\yolov8n.pt')

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

# Configurar la ubicación de Tesseract OCR
pytesseract.pytesseract.tesseract_cmd = r'D:/Programs/Tesseract/tesseract'

# Ruta de la imagen
#image_path = '98.jpg'
directory_path = r'D:/Mele/University/ULPGC/Asignaturas 23-24/VC/P5/media/Images'

# Listar todos los archivos en el directorio
image_files = [f for f in os.listdir(directory_path) if f.lower().endswith(('.png', '.jpg', '.jpeg', '.gif'))]

# Inicializar el índice de la imagen actual
current_image_index = 0

while current_image_index < len(image_files):
    # Construir la ruta de la imagen actual
    image_file = image_files[current_image_index]
    image_path = os.path.join(directory_path, image_file)

    # Leer la imagen
    img = cv2.imread(image_path)

    # Redimensionar la imagen
    img = imutils.resize(img, width=1000)
    
    # Perform inference on the image
    results = model(img)

    # Para cada detección
    for r in results:
        boxes = r.boxes
        
        for box in boxes:
            # Contenedor
            x1, y1, x2, y2 = box.xyxy[0]
            x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # convert to int values

            # Confianza
            confidence = math.ceil((box.conf[0] * 100)) / 100
           
            # Clase
            cls = int(box.cls[0])
   
            # Si la detección es un coche, obtenemos las medidas
            if classNames[cls] == "car" or classNames[cls] == "truck" or classNames[cls] == "train":
                width = x2 - x1
                height = y2 - y1
    
                # Dibuja el contenedor y clase
                cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 3)
                y2 = int(0.25 * height)
                
                # Dibuja el cuadrado con la nueva altura
                cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 3)
                
                cv2.putText(img, f"Car: {width}x{height}", (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

                image = img[y2:y2 + height, x1:x1 + width]
                pruebas(image)
   
    # Muestra la imagen con las detecciones
    cv2.imshow('Image', img)

    # Wait for a key press
    key = cv2.waitKey(0)

    # Move to the next image if right arrow key is pressed
    if (key == ord('d')) and current_image_index < len(image_files) - 1:
        current_image_index += 1
    # Move to the previous image if left arrow key is pressed
    elif (key == ord('a')) and current_image_index > 0:
        current_image_index -= 1
    # Stop if ESC key is pressed
    elif key == 27:
        break
    
cv2.destroyAllWindows()


0: 320x640 1 oven, 49.0ms
Speed: 2.0ms preprocess, 49.0ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 320x640 1 bus, 42.0ms
Speed: 1.0ms preprocess, 42.0ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

0: 384x640 1 car, 49.0ms
Speed: 2.0ms preprocess, 49.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)
Text to be displayed: 83245 LCX 
Número de placa en 98.jpg: 83245 LCX 

0: 448x640 1 car, 65.0ms
Speed: 1.0ms preprocess, 65.0ms inference, 1.0ms postprocess per image at shape (1, 3, 448, 640)
Text to be displayed:   
Número de placa en aa.jpg:   

0: 384x640 2 buss, 47.6ms
Speed: 2.0ms preprocess, 47.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 416x640 1 car, 1 truck, 48.0ms
Speed: 1.0ms preprocess, 48.0ms inference, 1.0ms postprocess per image at shape (1, 3, 416, 640)
Text to be displayed: 2801 NLE 
Número de placa en abece.jpg: 2801 NLE 

0: 384x640 1 car, 47.0ms
Speed: 1.0ms preprocess, 47

Video capture

In [4]:
# Configurar la ubicación de Tesseract OCR
pytesseract.pytesseract.tesseract_cmd = r'D:/Programs/Tesseract/tesseract'

# Cargar el video
cap = cv2.VideoCapture(r'D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\media\video1.mp4')


while True:
    ret, frame = cap.read()
    if not ret:
        break

    frame = imutils.resize(frame, width=500)

    # Detalles de tu código de detección de placas
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray = cv2.bilateralFilter(gray, 11, 17, 17)
    edged = cv2.Canny(gray, 170, 200)
    cnts, _ = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:30]

    NumberPlateCount = None

    for i in cnts:
        perimeter = cv2.arcLength(i, True)
        approx = cv2.approxPolyDP(i, 0.02 * perimeter, True)
        if len(approx) == 4:
            NumberPlateCount = approx
            x, y, w, h = cv2.boundingRect(i)
            crp_img = frame[y:y + h, x:x + w]

            # Guardar la imagen recortada temporalmente
            cv2.imwrite("./media/Output/temp2.png", crp_img)

            # Convierte la imagen umbralizada a texto utilizando pytesseract
            text = pytesseract.image_to_string("./media/Output/temp2.png", lang='eng')

            # Use re.search on the text string
            if text is not None and re.search(r'\d', text) and re.search(r'\w', text):
                # Imprime el texto detectado
                print("Número de placa:", text)

            break

    # Dibujar contorno y mostrar el frame
    if NumberPlateCount is not None:
        cv2.drawContours(frame, [NumberPlateCount], -1, (0, 255, 0), 3)

    cv2.imshow("Vehículos", frame)
    t = cv2.waitKey(1)

    if t == 27:
        break

cap.release()
cv2.destroyAllWindows()

Número de placa: "27


Número de placa: El

63277)

Número de placa: Teaz7lh


Número de placa: ss

63271


Número de placa: +, -—H
1632718

Número de placa: "63271


Número de placa: (622714


Número de placa: 7
tS327T1h


Número de placa: 163271


Número de placa: 632718


Número de placa: 632715

Número de placa: -5-—7
163271k


Número de placa: 5—-—n
163271k


Número de placa: | — —4
1632718


Número de placa: |
163271h


Número de placa: -——1
63271


Número de placa: --——1
63271


Número de placa: ae
632711


Número de placa: "632771 |


Número de placa: 163271

Número de placa: j.——7

163271;


Número de placa: 1632775


Número de placa: arailt
‘63271


Número de placa: 163271



Documentación:
- https://docs.ultralytics.com/
- https://pypi.org/project/pytesseract/
- https://app.roboflow.com/

Descarga dataset

In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="pheu8JLCiblhXRc114tC")
project = rf.workspace("aa-h8tpn").project("aaa-cnetp")
dataset = project.version(1).download("yolov8")

Verificando CUDA con PyTorch

In [7]:
import torch
print(torch.cuda.is_available())

False


Entrenamiento yolo8

In [6]:
!yolo detect train model="D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\data\yolov8n.pt" data="D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\data\config.yaml" imgsz=614 device=CPU epochs=100 patience=150

Ultralytics YOLOv8.0.215 🚀 Python-3.11.5 torch-2.1.1 CPU (AMD Ryzen 5 5600X 6-Core Processor)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\data\yolov8n.pt, data=D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\data\config.yaml, epochs=100, patience=150, batch=16, imgsz=614, save=True, save_period=-1, cache=False, device=CPU, workers=8, project=None, name=train6, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augm



[34m[1mtrain: [0mScanning D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\datasets\train\labels.cache... 24 images, 1 backgrounds, 0 corrupt: 100%|██████████| 24/24 [00:00<?, ?it/s]
[34m[1mtrain: [0mScanning D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\datasets\train\labels.cache... 24 images, 1 backgrounds, 0 corrupt: 100%|██████████| 24/24 [00:00<?, ?it/s]

[34m[1mval: [0mScanning D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\datasets\valid\labels.cache... 7 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7/7 [00:00<?, ?it/s]
[34m[1mval: [0mScanning D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\datasets\valid\labels.cache... 7 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7/7 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]
      1/100         0G      1.677       3.39      1.817         47        640:   0%|          | 0/2 [00:05<?, ?it/s]
      1/100         0G      1.677       3.39      1.817         47        640:  50%|█████     | 1/2 [00:05<

Ultralytics YOLOv8.0.215 🚀 Python-3.11.5 torch-2.1.1 CPU (AMD Ryzen 5 5600X 6-Core Processor)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\data\yolov8n.pt, data=D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\data\config.yaml, epochs=100, patience=150, batch=16, imgsz=614, save=True, save_period=-1, cache=False, device=CPU, workers=8, project=None, name=train7, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augm



[34m[1mtrain: [0mScanning D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\datasets\train\labels.cache... 24 images, 1 backgrounds, 0 corrupt: 100%|██████████| 24/24 [00:00<?, ?it/s]
[34m[1mtrain: [0mScanning D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\datasets\train\labels.cache... 24 images, 1 backgrounds, 0 corrupt: 100%|██████████| 24/24 [00:00<?, ?it/s]

[34m[1mval: [0mScanning D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\datasets\valid\labels.cache... 7 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7/7 [00:00<?, ?it/s]
[34m[1mval: [0mScanning D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\datasets\valid\labels.cache... 7 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7/7 [00:00<?, ?it/s]

  0%|          | 0/2 [00:00<?, ?it/s]
      1/100         0G      1.677       3.39      1.817         47        640:   0%|          | 0/2 [00:09<?, ?it/s]
      1/100         0G      1.677       3.39      1.817         47        640:  50%|█████     | 1/2 [00:09<

Evaluación del modelo

In [7]:
!yolo task=detect mode=val model="D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\runs\detect\train6\weights\best.pt" data="D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\data\config.yaml"

Ultralytics YOLOv8.0.215 🚀 Python-3.11.5 torch-2.1.1 CPU (AMD Ryzen 5 5600X 6-Core Processor)
Model summary (fused): 168 layers, 3005843 parameters, 0 gradients, 8.1 GFLOPs
                   all          7          7      0.827      0.688      0.874       0.56
Speed: 1.3ms preprocess, 67.4ms inference, 0.0ms loss, 4.4ms postprocess per image
Results saved to [1mruns\detect\val2[0m
💡 Learn more at https://docs.ultralytics.com/modes/val



[34m[1mval: [0mScanning D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\datasets\valid\labels.cache... 7 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7/7 [00:00<?, ?it/s]
[34m[1mval: [0mScanning D:\Mele\University\ULPGC\Asignaturas 23-24\VC\P5\datasets\valid\labels.cache... 7 images, 0 backgrounds, 0 corrupt: 100%|██████████| 7/7 [00:00<?, ?it/s]

                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 0/1 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  1.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 1/1 [00:00<00:00,  1.79it/s]


In [5]:
# Load the model
model = YOLO('D:\\Mele\\University\\ULPGC\\Asignaturas 23-24\\VC\\P5\\runs\\detect\\train6\\weights\\best.pt')  # Containers

# Class names
classNames = ["license"]

# Folder containing images
image_folder = "D:\\Mele\\University\\ULPGC\\Asignaturas 23-24\\VC\\P5\\media\\Images"
# Get the list of files in the folder
images = os.listdir(image_folder)
# Sort the list of images for consistent order
images.sort()

# Index of the current image
current_image_index = 0

# Iterate over each file in the folder
while current_image_index < len(images):
    # Get the current image file
    img = images[current_image_index]

    # Check if the file is an image (you can adjust the extensions as needed)
    if img.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
        # Build the full path of the image
        image_path = os.path.join(image_folder, img)

        # Read the image using OpenCV
        image = cv2.imread(image_path)

        # Perform inference on an image
        results = model(image, stream=True)

        # For each detection
        for r in results:
            boxes = r.boxes

            for box in boxes:
                # Bounding box coordinates
                x1, y1, x2, y2 = box.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # convert to int values

                # Confidence
                confidence = math.ceil((box.conf[0] * 100)) / 100
                print("Confidence --->", confidence)

                # Class
                cls = int(box.cls[0])
                print("Class name -->", classNames[cls])

                # Convert class numeric identifier to an RGB color
                scale = int((cls / len(classNames)) * 255 * 3)
                if scale >= 255 * 2:
                    R = 255
                    G = 255
                    B = scale - 255 * 2
                else:
                    if scale >= 255:
                        R = 255
                        G = scale - 255
                        B = 0
                    else:
                        R = scale
                        G = 0
                        B = 0

                # Draw bounding box and class name
                cv2.rectangle(image, (x1, y1), (x2, y2), (R, G, B), 3)
                cv2.putText(image, classNames[cls], [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

        # Show frame
        cv2.imshow('Vid', image)

        # Wait for a key press
        key = cv2.waitKey(0)

        # Move to the next image if right arrow key is pressed
        if (key == ord('d')) and current_image_index < len(images) - 1:
            current_image_index += 1
        # Move to the previous image if left arrow key is pressed
        elif (key == ord('a')) and current_image_index > 0:
            current_image_index -= 1
        # Stop if ESC key is pressed
        elif key == 27:
            break

# Destroy windows
cv2.destroyAllWindows()



Confidence ---> 0.95
Class name --> license
Confidence ---> 0.29
Class name --> license
0: 320x640 2 licenses, 54.0ms
Speed: 3.0ms preprocess, 54.0ms inference, 1.0ms postprocess per image at shape (1, 3, 320, 640)

Confidence ---> 0.94
Class name --> license
Confidence ---> 0.56
Class name --> license
0: 320x640 2 licenses, 42.0ms
Speed: 3.0ms preprocess, 42.0ms inference, 0.0ms postprocess per image at shape (1, 3, 320, 640)

Confidence ---> 0.81
Class name --> license
0: 384x640 1 license, 51.0ms
Speed: 2.0ms preprocess, 51.0ms inference, 0.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 (no detections), 43.0ms
Speed: 2.0ms preprocess, 43.0ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

Confidence ---> 0.87
Class name --> license
0: 448x640 1 license, 49.0ms
Speed: 1.0ms preprocess, 49.0ms inference, 0.0ms postprocess per image at shape (1, 3, 448, 640)

Confidence ---> 0.96
Class name --> license
0: 384x640 1 license, 44.0ms
Speed: 2.0ms prepro

Documentación:
- https://medium.com/@harunijaz/a-step-by-step-guide-to-installing-cuda-with-pytorch-in-conda-on-windows-verifying-via-console-9ba4cd5ccbef
- https://app.roboflow.com/