# Definir variables

In [1]:
INPUT_VIDEO = "./videos/casa_de_cambio_2-20250901_mix_hasta_1030.mp4"
FRAMES_DIR = "./frames"     # Directorio donde se guardaran los frames extraidos del video
DATASET_DIR = "./dataset"   # Directorio donde se guardara el dataset generado por autodistill
FRAME_RATE = 1              # FPS a los que se extraeran los frames

from autodistill.detection import CaptionOntology

# Ontología para crear clases -> { "lo que el modelo buscara" : "nombre final de la clase" }
ontology = CaptionOntology({
    "vehicle": "vehicle",
    "person": "person"
})

# Extraer Frames de Video en Alta calidad

In [2]:
import subprocess
import shutil
from pathlib import Path

if not INPUT_VIDEO:
    raise ValueError("Set INPUT_VIDEO to the path of the video file.")
if not FRAMES_DIR:
    raise ValueError("Set FRAMES_DIR to the output directory for frames.")
if not FRAME_RATE or FRAME_RATE <= 0:
    raise ValueError("FRAME_RATE must be a positive number.")
if shutil.which("ffmpeg") is None:
    raise EnvironmentError("ffmpeg is not installed or not in PATH.")

in_path = Path(INPUT_VIDEO)
out_dir = Path(FRAMES_DIR) / in_path.stem
out_dir.mkdir(parents=True, exist_ok=True)

In [None]:
output_pattern = str(out_dir / "frame_%06d.png")  # PNG for lossless (max quality)

cmd = [
    "ffmpeg",
    "-hide_banner",
    "-hwaccel", "cuda",
    "-loglevel", "error",
    "-i", str(in_path),
    "-vf", f"fps={FRAME_RATE}",
    "-vsync", "0",
    "-y",
    output_pattern,
]

subprocess.run(cmd, check=True)
print(f"Frames extracted to: {out_dir}")

# Etiquetar usando Grounding DINO

In [3]:
from autodistill_grounding_dino import GroundingDINO
import torch

if not torch.cuda.is_available():
    raise EnvironmentError("CUDA is not available on this machine.")

torch.cuda.set_device(0)
try:
    torch.set_default_device("cuda")
except AttributeError:
    pass

base_model = GroundingDINO(ontology=ontology)

print(f"Etiquetando frames en ./frames/{out_dir.name}")

base_model.label(input_folder=f"./frames/{out_dir.name}", extension=".png", output_folder=f"./dataset/{out_dir.name}")

print(f"Dataset creado en: ./dataset/{out_dir.name}")

Importing from timm.models.layers is deprecated, please import via timm.layers
torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at C:\actions-runner\_work\pytorch\pytorch\pytorch\aten\src\ATen\native\TensorShape.cpp:4324.)


trying to load grounding dino directly
final text_encoder_type: bert-base-uncased
Etiquetando frames en ./frames/casa_de_cambio_2-20250901_mix_hasta_1030


Labeling ./frames/casa_de_cambio_2-20250901_mix_hasta_1030\frame_000001.png:   0%|          | 0/748 [00:00<?, ?it/s]The `device` argument is deprecated and will be removed in v5 of Transformers.
torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. In version 2.5 we will raise an exception if use_reentrant is not passed. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.
None of the inputs have requires_grad=True. Gradients will be None
`torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.
Labeling ./frames/casa_de_cambio_2-20250901_mix_hasta_1030\frame_000002.png:   0%|          | 1/748 [00:02<36:00,  2.89s/it]


AcceleratorError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
