#Paquetes necesarios

In [1]:
import cv2  
import math 

from ultralytics import YOLO
import pytesseract

Desde cámara, detección con yolov8 y modelo nano. Visualización propia

In [3]:
# Carga del modelo
model = YOLO('yolov8n.pt')  # Contenedores

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]

# Para un vídeo
filename = "C:/Users/Lenovo/Desktop/video.MOV"
cap = cv2.VideoCapture(filename)

while(cap.isOpened()):
    # fotograma a fotograma
    ret, img = cap.read()

    # si hay imagen válida
    if ret:
        # Perform inference on an image
        results = model(img, stream=True)

        # Para cada detección
        for r in results:
            boxes = r.boxes

            for box in boxes:
                # Contenedor
                x1, y1, x2, y2 = box.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)  # convert to int values

                # Confianza
                confidence = math.ceil((box.conf[0] * 100)) / 100
                print("Confidence --->", confidence)

                # Clase
                cls = int(box.cls[0])
                print("Class name -->", classNames[cls])

                # Convierte identificador numérico de clase a un color RGB
                escala = int((cls / len(classNames)) * 255 * 3)
                if escala >= 255 * 2:
                    R = 255
                    G = 255
                    B = escala - 255 * 2
                else:
                    if escala >= 255:
                        R = 255
                        G = escala - 255
                        B = 0
                    else:
                        R = escala
                        G = 0
                        B = 0

                # Dibuja el contenedor y clase
                cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
                cv2.putText(img, classNames[cls], [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

                # Si la clase es un 'car' (vehículo), intenta reconocer la matrícula
                if classNames[cls] == "car":
                    # Recorta la región de la matrícula
                    plate_img = img[y1:y2, x1:x2]

                    # Convierte la imagen a escala de grises
                    gray_plate = cv2.cvtColor(plate_img, cv2.COLOR_BGR2GRAY)

                    # Aplica umbral para resaltar los caracteres
                    _, thresh = cv2.threshold(gray_plate, 120, 255, cv2.THRESH_BINARY)

                    # Utiliza Tesseract para reconocer texto en la imagen
                    text = pytesseract.image_to_string(thresh, config='--psm 8', lang='eng')

                    # Muestra el texto reconocido
                    print("License Plate:", text)

                    # Dibuja el texto sobre la imagen
                    cv2.putText(img, text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1, cv2.LINE_AA)

        # Redimensionamos el vídeo antes de mostrarlo
        height, width, _ = img.shape
        resized_img = cv2.resize(img, (int(width * 0.50), int(height * 0.50)))

        # Muestra fotograma
        cv2.imshow('Vid', resized_img)

        # Detenemos pulsado ESC
        if cv2.waitKey(20) == 27:
            break
    else:
        break

# Libera el objeto de captura
cap.release()

# Destruye ventanas
cv2.destroyAllWindows()


0: 640x384 3 cars, 97.4ms
Speed: 3.5ms preprocess, 97.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


Confidence ---> 0.83
Class name --> car
Confidence ---> 0.62
Class name --> car
Confidence ---> 0.34
Class name --> car



0: 640x384 3 cars, 101.2ms
Speed: 2.0ms preprocess, 101.2ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


License Plate: ee

Confidence ---> 0.82
Class name --> car
Confidence ---> 0.64
Class name --> car
Confidence ---> 0.45
Class name --> car



0: 640x384 3 cars, 95.4ms
Speed: 2.8ms preprocess, 95.4ms inference, 0.0ms postprocess per image at shape (1, 3, 640, 384)


License Plate: ae

Confidence ---> 0.75
Class name --> car
Confidence ---> 0.74
Class name --> car
Confidence ---> 0.35
Class name --> car



0: 640x384 3 cars, 98.7ms
Speed: 10.4ms preprocess, 98.7ms inference, 1.8ms postprocess per image at shape (1, 3, 640, 384)


License Plate: +

Confidence ---> 0.84
Class name --> car
Confidence ---> 0.76
Class name --> car
Confidence ---> 0.31
Class name --> car



0: 640x384 2 cars, 83.1ms
Speed: 17.0ms preprocess, 83.1ms inference, 2.0ms postprocess per image at shape (1, 3, 640, 384)


License Plate: | elles

Confidence ---> 0.8
Class name --> car
Confidence ---> 0.7
Class name --> car



0: 640x384 4 cars, 98.1ms
Speed: 2.0ms preprocess, 98.1ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


License Plate: Ra

Confidence ---> 0.83
Class name --> car
Confidence ---> 0.66
Class name --> car
Confidence ---> 0.26
Class name --> car
Confidence ---> 0.26
Class name --> car



0: 640x384 3 cars, 92.3ms


License Plate: Gia

Confidence ---> 0.81
Class name --> car
Confidence ---> 0.72
Class name --> car
Confidence ---> 0.27
Class name --> car


Speed: 2.0ms preprocess, 92.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 3 cars, 83.6ms
Speed: 3.8ms preprocess, 83.6ms inference, 14.3ms postprocess per image at shape (1, 3, 640, 384)


License Plate: las |

Confidence ---> 0.79
Class name --> car
Confidence ---> 0.77
Class name --> car
Confidence ---> 0.42
Class name --> car



0: 640x384 4 cars, 97.4ms
Speed: 4.0ms preprocess, 97.4ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 384)


License Plate: ia

Confidence ---> 0.83
Class name --> car
Confidence ---> 0.75
Class name --> car
Confidence ---> 0.52
Class name --> car
Confidence ---> 0.42
Class name --> car



0: 640x384 4 cars, 1 truck, 86.3ms
Speed: 2.0ms preprocess, 86.3ms inference, 1.7ms postprocess per image at shape (1, 3, 640, 384)


License Plate: a

Confidence ---> 0.84
Class name --> car
Confidence ---> 0.66
Class name --> car
Confidence ---> 0.38
Class name --> car
Confidence ---> 0.38
Class name --> car
Confidence ---> 0.27
Class name --> truck


Diversos modelos preentrenados, visualizando

In [None]:
# Carga del modelo
#model = YOLO('yolov8n.pt') #Contenedores
#model = YOLO('yolov8n-seg.pt') #Máscaras
model = YOLO('yolov8n-pose.pt')  #Pose

#Para un vídeo 
filename = "C:/Users/otsed/Desktop/RUNNERS_ILUSOS/Multimedia/Bibs/TGC23_PdH_C0056_resultado.mp4"
results = model(filename, show=True)

cv2.destroyAllWindows()



Intregración con seguimiento (tracking)
Nota: he tenido que bajar a la versión de python 3.9.5 e instalar lap con pip install lap

In [None]:
# Carga del modelo
model = YOLO('yolov8n.pt') #Contenedores
#model = YOLO('yolov8n-seg.pt') #Máscaras
#model = YOLO('yolov8n-pose.pt')  #Pose

#Para un vídeo 
#filename = "C:/Users/otsed/Desktop/RUNNERS_ILUSOS/Multimedia/Bibs/TGC23_PdH_C0056_resultado.mp4"
filename = "D:/GH010196s.mp4"
results = model.track(source=filename, show=True)  # BoT-SORT tracker (por defecto)
#results = model.track(source=filename, show=True, tracker="bytetrack.yaml")  # ByteTrack tracker

cv2.destroyAllWindows()

Reconocimiento de caracteres tras instalar pytesseract y tesseract

In [None]:
# Tesseract
import cv2
import pytesseract

# Previamente debes descargar los ejecutables
# Si la ruta de Tesseract no está en el PATH, ruta al ejecutable
pytesseract.pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract'

# Lenguajes disponibles
print(pytesseract.get_languages(config=''))

#Cargo imagen y ocnvierto a RGB
img = cv2.imread('toy.tif') 
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

#Aplica reconocedor a imagen cargada
print(pytesseract.image_to_string(img_rgb))

Reconocimiento decaracteres tras instalar easyocr

In [None]:
import easyocr

#Carga del modelo de lengua
reader = easyocr.Reader(['es']) 

#Reconocimiento de una imagen
result = reader.readtext('toy.tif')
print(result)

#Con restricción de caracteres reconocibles
#result = reader.readtext('toy.tif', allowlist ='0123456789')