In [2]:
pip install opencv-python

Collecting opencv-python
  Using cached opencv-python-4.11.0.86.tar.gz (95.2 MB)
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Installing backend dependencies ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: opencv-python
  Building wheel for opencv-python (pyproject.toml) ... [?25l/^C
Note: you may need to restart the kernel to use updated packages.


In [None]:
pip install torch torchvision torchaudio \
    opencv-python pillow numpy groundingdino \
    transformers tqdm pycocotools

In [None]:
pip install git+https://github.com/IDEA-Research/GroundingDINO.git


In [1]:
import os
import cv2
import torch
import numpy as np
from pathlib import Path
from PIL import Image
from time import time
from typing import Any
from GroundingDINO.groundingdino.util.inference import load_model, load_image, predict, annotate
import GroundingDINO.groundingdino.datasets.transforms as T
from database.read_database import ReadImages


class AutoLabellingObjectDetect:
    def __init__(self):
        self.data = ReadImages()

        # Variables
        self.cont: int = 0
        self.num_images: int = 0
        self.class_id: int = 2

        self.box_threshold: float = 0.38
        self.text_threshold: float = 0.25

        self.out_path = Path("img/")
        self.prompt: str = "ductos de cables"  # Cambiado de 'face' a 'ductos de cables'
        self.home = Path.cwd()

        self.save: bool = True
        self.draw: bool = False

        self.images: list = []
        self.names: list = []
        self.bbox_info: list = []

        # Crear directorio de salida si no existe
        self.out_path.mkdir(parents=True, exist_ok=True)

    def save_data(self, image_copy: np.ndarray, list_info: list):
        """ Guarda la imagen etiquetada y su información de bounding boxes. """
        time_now = str(int(time() * 1000))  # Timestamp en milisegundos
        img_path = self.out_path / f"{time_now}.jpg"
        txt_path = self.out_path / f"{time_now}.txt"

        cv2.imwrite(str(img_path), image_copy)

        with open(txt_path, "w") as f:
            f.writelines(list_info)

    def config_grounding_model(self) -> Any:
        """ Configura y carga el modelo Grounding DINO. """
        config_path = self.home / "GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py"
        check_point_path = self.home / "GroundingDINO/weights/groundingdino_swint_ogc.pth"

        # Verificar si CUDA está disponible
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        model = load_model(str(config_path), str(check_point_path), device=self.device)
        return model

    def main(self):
        """ Ejecuta el proceso de detección y etiquetado automático. """
        self.images, self.names = self.data.read_images("database/untagged_images")
        self.num_images = len(self.images)
        grounding_model = self.config_grounding_model()

        while self.cont < self.num_images:
            self.bbox_info = []
            print("-" * 40)
            print(f"Procesando imagen: {self.names[self.cont]}")

            process_image = self.images[self.cont]
            copy_image = process_image.copy()
            draw_image = process_image.copy()

            transform = T.Compose([
                T.RandomResize([800], max_size=1333),
                T.ToTensor(),
                T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])

            img_source = Image.fromarray(process_image).convert("RGB")
            img_transform, _ = transform(img_source, None)

            boxes, logits, phrases = predict(
                model=grounding_model,
                image=img_transform,
                caption=self.prompt,
                box_threshold=self.box_threshold,
                text_threshold=self.text_threshold,
                device=self.device
            )

            if not boxes:
                print(f"No se encontraron detecciones en {self.names[self.cont]}")
                self.cont += 1
                continue

            h, w, _ = process_image.shape
            for i, box in enumerate(boxes):
                xc, yc, an, al = map(lambda x: max(0, min(x, 1)), box)

                self.bbox_info.append(f"{self.class_id} {xc} {yc} {an} {al}\n")
                x1, y1, x2, y2 = int(xc * w), int(yc * h), int(an * w), int(al * h)
                print(f"Caja detectada: x1={x1}, y1={y1}, x2={x2}, y2={y2}")

            if self.save:
                self.save_data(copy_image, self.bbox_info)

            if self.draw:
                annotated_img = annotate(image_source=draw_image, boxes=boxes, logits=logits, phrases=phrases)
                out_frame = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
                cv2.imshow("Grounding DINO detect", out_frame)
                cv2.waitKey(0)

            self.cont += 1


if __name__ == "__main__":
    auto_labeling = AutoLabellingObjectDetect()
    auto_labeling.main()


ModuleNotFoundError: No module named 'cv2'