In [11]:
from datasets import load_dataset

dataset = load_dataset("Francesco/animals-ij5d2")

print(dataset)

DatasetDict({
    train: Dataset({
        features: ['image_id', 'image', 'width', 'height', 'objects'],
        num_rows: 700
    })
    validation: Dataset({
        features: ['image_id', 'image', 'width', 'height', 'objects'],
        num_rows: 100
    })
    test: Dataset({
        features: ['image_id', 'image', 'width', 'height', 'objects'],
        num_rows: 200
    })
})


In [12]:
# Acceder a las anotaciones de una muestra
sample = dataset["train"][0]
annotations = sample["objects"]

# Acceder a las categorías
categories = annotations["category"]

# Contar el número de clases (sin contar el fondo)
num_classes = 10 + 1  # +1 para el fondo
print(f"Número de clases (con fondo): {num_classes}")


Número de clases (con fondo): 11


In [2]:
from transformers import YolosImageProcessor
import torch

# Cargar el procesador de YOLOS
processor = YolosImageProcessor.from_pretrained("hustvl/yolos-base")

# Función para procesar las imágenes y las etiquetas
def collate_fn(batch):
    # Crear una lista de tensores para las imágenes procesadas
    images = [processor(images=img["image"], return_tensors="pt")["pixel_values"].squeeze(0) for img in batch]
    
    # Apilar las imágenes (debe ser una lista de tensores, no un único tensor)
    images = torch.stack(images, dim=0)  # Unir a lo largo del batch dimension
    
    # Preparar las anotaciones (cajas y etiquetas)
    labels = []
    for img in batch:
        objects = img["objects"]
        boxes = torch.tensor(objects["bbox"], dtype=torch.float32)  # Coordenadas de la caja [xmin, ymin, xmax, ymax]
        category_ids = torch.tensor(objects["category"], dtype=torch.long)  # IDs de las categorías de los objetos
        labels.append({"boxes": boxes, "labels": category_ids})

    return {"pixel_values": images, "labels": labels}



  from .autonotebook import tqdm as notebook_tqdm


In [14]:
# Verificar las dimensiones de las imágenes procesadas
sample = dataset["train"][0]
processed_image = processor(images=sample["image"], return_tensors="pt")["pixel_values"]
print(processed_image.shape)  # Esperado: [1, 3, 800, 800]


torch.Size([1, 3, 800, 800])


In [15]:
# Verificar las dimensiones después de squeeze
processed_image = processed_image.squeeze(0)
print(processed_image.shape)  # Esperado: [3, 800, 800]


torch.Size([3, 800, 800])


In [16]:
from torch.utils.data import DataLoader

# Crear DataLoader con el nuevo collate_fn
train_dataloader = DataLoader(dataset["train"], batch_size=4, shuffle=True, collate_fn=collate_fn)


In [17]:
from transformers import YolosForObjectDetection

# Cargar modelo preentrenado
model = YolosForObjectDetection.from_pretrained("hustvl/yolos-base")

# Ajustar la capa de clasificación para tu número de clases
num_classes = 11  # +1 para el fondo
model.class_labels_classifier = torch.nn.Linear(model.config.hidden_size, num_classes)


In [18]:
from transformers import AdamW

# Configurar el dispositivo (GPU si está disponible)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Configurar optimizador
optimizer = AdamW(model.parameters(), lr=5e-5)

# Bucle de entrenamiento
num_epochs = 10
model.train()
for epoch in range(num_epochs):
    for batch in train_dataloader:
        images = batch["pixel_values"].to(device)
        labels = [{k: v.to(device) for k, v in t.items()} for t in batch["labels"]]

        # Adelante
        outputs = model(pixel_values=images, labels=labels)
        loss = outputs.loss

        # Atrás
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    print(f"Época {epoch + 1}/{num_epochs}, Pérdida: {loss.item()}")


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 7.75 GiB of which 11.94 MiB is free. Including non-PyTorch memory, this process has 7.72 GiB memory in use. Of the allocated memory 7.40 GiB is allocated by PyTorch, and 210.61 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [3]:

torch.cuda.empty_cache()