# Entrenamiento en Google Colab

In [None]:
import kagglehub

path = kagglehub.dataset_download("viswaprakash1990/garbage-detection")
print("Path to dataset files:", path)

In [None]:
import os
import re
from collections import defaultdict

BASE_DIR = path + "/GARBAGE CLASSIFICATION"

def extract_class(filename):
    """
    Extrae la clase a partir del nombre de archivo.
    """
    name = os.path.splitext(filename)[0]
    match = re.match(r"([a-zA-Z]+)", name)
    return match.group(1).upper() if match else "UNKNOWN"


def count_classes(images_dir, labels_dir):
    """
    Devuelve dos diccionarios:
    - conteo imágenes por clase
    - conteo labels por clase
    """
    img_count = defaultdict(int)
    lbl_count = defaultdict(int)

    if os.path.exists(images_dir):
        for file in os.listdir(images_dir):
            if file.lower().endswith((".jpg", ".jpeg", ".png")):
                cls = extract_class(file)
                img_count[cls] += 1

    if os.path.exists(labels_dir):
        for file in os.listdir(labels_dir):
            if file.lower().endswith(".txt"):
                cls = extract_class(file)
                lbl_count[cls] += 1

    return img_count, lbl_count


# =======================
#    CONTAR TODO
# =======================

splits = ["train", "valid", "test"]

results = {}

for split in splits:
    img_dir = os.path.join(BASE_DIR, split, "images")
    lbl_dir = os.path.join(BASE_DIR, split, "labels")

    img_count, lbl_count = count_classes(img_dir, lbl_dir)
    results[split] = (img_count, lbl_count)


# =======================
#    IMPRIMIR RESULTADOS
# =======================

print("\n========== RESUMEN COMPLETO ==========\n")

for split in splits:
    print(f"===== {split.upper()} =====")

    img_count, lbl_count = results[split]

    print("\n-- Imágenes por clase --")
    for cls, count in sorted(img_count.items()):
        print(f"  {cls}: {count}")

    print("\n-- Labels por clase --")
    for cls, count in sorted(lbl_count.items()):
        print(f"  {cls}: {count}")

    print("\n-------------------------------------\n")

print("=======================================\n")


In [None]:
from ultralytics import YOLO

model = YOLO("yolov8s.pt")

results = model.train(
    data= path + "/GARBAGE CLASSIFICATION/data.yaml",
    epochs=100,
    imgsz=640,
    batch=16,
    workers=2,
    patience=20,
    optimizer="AdamW",
    lr0=0.001,
    cos_lr=True,
    cache="disk",
    amp=True,
    project="/content/drive/MyDrive/yolo_runs",
    name="garbage_yolov8s"
)

model.export(format="onnx")
model.export(format="torchscript")

print("Entrenamiento completo.")