#Paquetes necesarios

In [1]:
import cv2  
import math 

from ultralytics import YOLO



Desde cámara, detección con yolov8 y modelo nano

In [4]:
# Carga del modelo
model = YOLO('yolov8n.pt')

# Nombre de las distintas clases
classNames = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat",
              "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat",
              "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", "backpack", "umbrella",
              "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
              "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
              "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli",
              "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa", "pottedplant", "bed",
              "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone",
              "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
              "teddy bear", "hair drier", "toothbrush"
              ]


# Captura desde lawebcam
vid = cv2.VideoCapture(0)
  
while(True):      
    # fotograma a fotograma
    ret, img = vid.read()
  
    # si hay imagen válida
    if ret:  
        # Perform inference on an image
        results = model(img, stream=True)
        
        # Para cada detección
        for r in results:
            boxes = r.boxes

            for box in boxes:
                # Contenedor
                x1, y1, x2, y2 = box.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # convert to int values
                
                # Confianza
                confidence = math.ceil((box.conf[0]*100))/100
                print("Confidence --->",confidence)

                # Clase
                cls = int(box.cls[0])
                print("Class name -->", classNames[cls])

                # Convierte identificador numérico de clase a un color RGB
                escala = int((cls / len(classNames)) * 255 * 3)
                if escala >= 255*2:
                    R = 255
                    G = 255
                    B = escala - 255*2
                else:
                    if escala >= 255:
                        R = 255
                        G = escala - 255
                        B = 0
                    else:
                        R = escala
                        G = 0
                        B = 0

                # Dibuja el contenedor y clase
                cv2.rectangle(img, (x1, y1), (x2, y2), (R, G, B), 3)
                cv2.putText(img, classNames[cls] , [x1, y1], cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, B), 2)

        # Muestra fotograma
        cv2.imshow('Vid', img)
    
    # Detenemos pulsado ESC
    if cv2.waitKey(20) == 27:
        break
  
# Libera el objeto de captura
vid.release()
# Destruye ventanas
cv2.destroyAllWindows()


0: 480x640 1 person, 343.1ms
Speed: 7.0ms preprocess, 343.1ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.75
Class name --> person


0: 480x640 1 person, 458.8ms
Speed: 18.0ms preprocess, 458.8ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.91
Class name --> person


0: 480x640 1 person, 436.8ms
Speed: 6.0ms preprocess, 436.8ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.89
Class name --> person


0: 480x640 1 person, 343.1ms
Speed: 11.0ms preprocess, 343.1ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.91
Class name --> person


0: 480x640 1 person, 448.8ms
Speed: 4.0ms preprocess, 448.8ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.89
Class name --> person


0: 480x640 1 person, 392.0ms
Speed: 4.0ms preprocess, 392.0ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.76
Class name --> person


0: 480x640 1 person, 354.1ms
Speed: 9.0ms preprocess, 354.1ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.84
Class name --> person


0: 480x640 1 person, 362.0ms
Speed: 8.0ms preprocess, 362.0ms inference, 13.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.84
Class name --> person


0: 480x640 1 person, 423.9ms
Speed: 8.0ms preprocess, 423.9ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.82
Class name --> person


0: 480x640 1 person, 311.2ms
Speed: 5.0ms preprocess, 311.2ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.82
Class name --> person


0: 480x640 1 person, 276.3ms
Speed: 4.0ms preprocess, 276.3ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.8
Class name --> person


0: 480x640 1 person, 280.3ms
Speed: 3.0ms preprocess, 280.3ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.79
Class name --> person


0: 480x640 1 person, 275.3ms
Speed: 6.0ms preprocess, 275.3ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.83
Class name --> person


0: 480x640 1 person, 262.3ms
Speed: 4.0ms preprocess, 262.3ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.84
Class name --> person


0: 480x640 1 person, 388.0ms
Speed: 6.0ms preprocess, 388.0ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.86
Class name --> person


0: 480x640 1 person, 337.1ms
Speed: 9.0ms preprocess, 337.1ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.84
Class name --> person


0: 480x640 1 person, 239.4ms
Speed: 5.0ms preprocess, 239.4ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.82
Class name --> person


0: 480x640 1 person, 260.3ms
Speed: 3.0ms preprocess, 260.3ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.83
Class name --> person


0: 480x640 1 person, 253.3ms
Speed: 3.0ms preprocess, 253.3ms inference, 9.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.8
Class name --> person


0: 480x640 1 person, 249.3ms
Speed: 7.0ms preprocess, 249.3ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.86
Class name --> person


0: 480x640 1 person, 285.2ms
Speed: 5.0ms preprocess, 285.2ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.86
Class name --> person


0: 480x640 1 person, 269.3ms
Speed: 9.0ms preprocess, 269.3ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.81
Class name --> person


0: 480x640 1 person, 228.4ms
Speed: 2.0ms preprocess, 228.4ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.72
Class name --> person


0: 480x640 1 person, 226.4ms
Speed: 3.0ms preprocess, 226.4ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.72
Class name --> person


0: 480x640 1 person, 349.1ms
Speed: 4.0ms preprocess, 349.1ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.8
Class name --> person


0: 480x640 1 person, 1 sports ball, 232.4ms
Speed: 4.0ms preprocess, 232.4ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.89
Class name --> person
Confidence ---> 0.27
Class name --> sports ball


0: 480x640 1 person, 235.4ms
Speed: 2.0ms preprocess, 235.4ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.83
Class name --> person


0: 480x640 1 person, 237.4ms
Speed: 4.0ms preprocess, 237.4ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.77
Class name --> person


0: 480x640 1 person, 1 toothbrush, 247.3ms
Speed: 3.0ms preprocess, 247.3ms inference, 9.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.8
Class name --> person
Confidence ---> 0.35
Class name --> toothbrush


0: 480x640 1 person, 1 toothbrush, 244.3ms
Speed: 2.0ms preprocess, 244.3ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.82
Class name --> person
Confidence ---> 0.26
Class name --> toothbrush


0: 480x640 1 person, 1 toothbrush, 236.4ms
Speed: 4.0ms preprocess, 236.4ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.78
Class name --> person
Confidence ---> 0.36
Class name --> toothbrush


0: 480x640 1 person, 250.3ms
Speed: 5.0ms preprocess, 250.3ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.76
Class name --> person


0: 480x640 1 person, 1 toothbrush, 326.1ms
Speed: 5.0ms preprocess, 326.1ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.77
Class name --> person
Confidence ---> 0.35
Class name --> toothbrush


0: 480x640 1 person, 1 toothbrush, 244.3ms
Speed: 4.0ms preprocess, 244.3ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.78
Class name --> person
Confidence ---> 0.34
Class name --> toothbrush


0: 480x640 1 person, 235.4ms
Speed: 3.0ms preprocess, 235.4ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.76
Class name --> person


0: 480x640 1 person, 277.3ms
Speed: 66.8ms preprocess, 277.3ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.91
Class name --> person


0: 480x640 1 person, 225.4ms
Speed: 3.0ms preprocess, 225.4ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.92
Class name --> person


0: 480x640 1 person, 262.3ms
Speed: 4.0ms preprocess, 262.3ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.86
Class name --> person


0: 480x640 1 person, 223.4ms
Speed: 3.0ms preprocess, 223.4ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.88
Class name --> person


0: 480x640 1 person, 268.3ms
Speed: 5.0ms preprocess, 268.3ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.83
Class name --> person


0: 480x640 1 person, 230.4ms
Speed: 2.0ms preprocess, 230.4ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.82
Class name --> person


0: 480x640 1 person, 246.3ms
Speed: 4.0ms preprocess, 246.3ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.82
Class name --> person


0: 480x640 1 person, 235.4ms
Speed: 9.0ms preprocess, 235.4ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.85
Class name --> person


0: 480x640 1 person, 249.3ms
Speed: 2.0ms preprocess, 249.3ms inference, 2.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.84
Class name --> person


0: 480x640 1 person, 237.4ms
Speed: 4.0ms preprocess, 237.4ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.85
Class name --> person


0: 480x640 1 person, 1 cell phone, 265.3ms
Speed: 7.0ms preprocess, 265.3ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.87
Class name --> cell phone
Confidence ---> 0.81
Class name --> person


0: 480x640 1 person, 1 cell phone, 353.1ms
Speed: 29.9ms preprocess, 353.1ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.87
Class name --> cell phone
Confidence ---> 0.72
Class name --> person


0: 480x640 1 person, 1 cell phone, 293.2ms
Speed: 10.0ms preprocess, 293.2ms inference, 6.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.79
Class name --> person
Confidence ---> 0.77
Class name --> cell phone


0: 480x640 1 person, 1 cell phone, 272.3ms
Speed: 5.0ms preprocess, 272.3ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.87
Class name --> cell phone
Confidence ---> 0.7
Class name --> person


0: 480x640 1 person, 1 cell phone, 293.2ms
Speed: 4.0ms preprocess, 293.2ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.86
Class name --> cell phone
Confidence ---> 0.76
Class name --> person


0: 480x640 1 person, 325.1ms
Speed: 7.0ms preprocess, 325.1ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.88
Class name --> person


0: 480x640 1 person, 289.2ms
Speed: 5.0ms preprocess, 289.2ms inference, 4.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.86
Class name --> person


0: 480x640 1 person, 258.3ms
Speed: 4.0ms preprocess, 258.3ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.82
Class name --> person


0: 480x640 1 person, 347.1ms
Speed: 5.0ms preprocess, 347.1ms inference, 8.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.87
Class name --> person


0: 480x640 1 person, 1 tv, 588.4ms
Speed: 6.0ms preprocess, 588.4ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.85
Class name --> person
Confidence ---> 0.27
Class name --> tvmonitor


0: 480x640 1 person, 483.7ms
Speed: 7.0ms preprocess, 483.7ms inference, 7.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.89
Class name --> person


0: 480x640 1 person, 270.3ms
Speed: 9.0ms preprocess, 270.3ms inference, 3.0ms postprocess per image at shape (1, 3, 480, 640)



Confidence ---> 0.84
Class name --> person


0: 480x640 1 person, 412.9ms
Speed: 3.0ms preprocess, 412.9ms inference, 5.0ms postprocess per image at shape (1, 3, 480, 640)


Confidence ---> 0.84
Class name --> person


Reconocimiento de caracteres tras instalar pytesseract y tesseract

In [4]:
# Tesseract
import cv2
import pytesseract

# Previamente debes descargar los ejecutables
# Si la ruta de Tesseract no está en el PATH, ruta al ejecutable
pytesseract.pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract'

# Lenguajes disponibles
print(pytesseract.get_languages(config=''))

#Cargo imagen y ocnvierto a RGB
img = cv2.imread('toy.tif') 
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

#Aplica reconocedor a imagen cargada
print(pytesseract.image_to_string(img_rgb))

['eng', 'osd']
Hasta el infinito y mas alla



Reconocimiento decaracteres tras instalar easyocr

In [6]:
import easyocr

#Carga del modelo de lengua
reader = easyocr.Reader(['es']) 

#Reconocimiento de una imagen
result = reader.readtext('toy.tif')
print(result)

#Con restricción de caracteres reconocibles
#result = reader.readtext('toy.tif', allowlist ='0123456789')

Neither CUDA nor MPS are available - defaulting to CPU. Note: This module is much faster with a GPU.


[([[49, 85], [617, 85], [617, 147], [49, 147]], 'Hasta el infinito y más allá', 0.6744627670805162)]
