<a href="https://colab.research.google.com/github/paubereon/proyecto_cienciadatos/blob/main/PRUEBA_1_PROYECTO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ultralytics roboflow opencv-python




In [None]:
!pip install numpy==1.23.5




In [None]:
import os
import shutil
from google.colab import drive
import kagglehub
from ultralytics import YOLO
import numpy as np
import torch
import ultralytics

# Montar Google Drive (opcional, para guardar resultados)
drive.mount('/content/drive', force_remount=True)

def setup_environment():
    """Verifica e instala dependencias necesarias"""
    print(f"Versiones instaladas:\n"
          f"- Numpy: {np.__version__}\n"
          f"- Torch: {torch.__version__}\n"
          f"- Ultralytics: {ultralytics.__version__}")

def download_datasets():
    """Descarga datasets de Kaggle"""
    print("Descargando datasets...")
    try:
        path1 = kagglehub.dataset_download("kvnpatel/fruits-vegetable-detection-for-yolov4")
        path2 = kagglehub.dataset_download("kapturovalexander/fruits-by-yolo-fruits-detection")
        print(f"Datasets descargados en:\n- {path1}\n- {path2}")
        return path1, path2
    except Exception as e:
        print(f"Error al descargar datasets: {e}")
        raise

def organize_dataset(dataset_paths, base_dir="/content/datasets"):
    """Organiza los archivos en estructura YOLO"""
    # Crear directorios necesarios
    yolo_images_dir = os.path.join(base_dir, "images")
    yolo_labels_dir = os.path.join(base_dir, "labels")

    os.makedirs(yolo_images_dir, exist_ok=True)
    os.makedirs(yolo_labels_dir, exist_ok=True)

    def move_files(source_dir):
        """Mueve archivos a los directorios correspondientes"""
        for root, _, files in os.walk(source_dir):
            for file in files:
                source_path = os.path.join(root, file)
                if file.lower().endswith((".jpg", ".jpeg", ".png")):
                    shutil.move(source_path, os.path.join(yolo_images_dir, file))
                elif file.endswith(".txt"):
                    shutil.move(source_path, os.path.join(yolo_labels_dir, file))

    # Procesar cada dataset
    for path in dataset_paths:
        move_files(path)

    print(f"¡Dataset organizado en {base_dir}!")
    return base_dir, yolo_images_dir, yolo_labels_dir

def create_yaml_config(dataset_dir, label_dir):
    """Crea archivo de configuración YAML para YOLO"""
    def get_classes():
        """Obtiene clases únicas de las etiquetas"""
        classes = set()
        for label_file in os.listdir(label_dir):
            if label_file.endswith(".txt"):
                with open(os.path.join(label_dir, label_file), "r") as f:
                    for line in f:
                        try:
                            class_id = line.strip().split()[0]
                            classes.add(class_id)
                        except IndexError:
                            continue
        return sorted(list(classes), key=int)  # Ordenar numéricamente

    classes = get_classes()
    yaml_path = os.path.join(dataset_dir, "data.yaml")

    config_content = f"""path: {dataset_dir}
train: images
val: images
test: images  # Usamos el mismo conjunto para simplificar

nc: {len(classes)}
names: {classes}
"""

    with open(yaml_path, "w") as f:
        f.write(config_content)

    print(f"Archivo de configuración creado en {yaml_path}")
    return yaml_path

def train_model(yaml_path, epochs=50, imgsz=640, model_name="yolov8n.pt"):
    """Entrena el modelo YOLO"""
    # Verificar existencia del archivo YAML
    if not os.path.exists(yaml_path):
        raise FileNotFoundError(f"No se encontró el archivo de configuración: {yaml_path}")

    # Cargar modelo preentrenado
    model = YOLO(model_name)

    # Entrenar
    results = model.train(
        data=yaml_path,
        epochs=epochs,
        imgsz=imgsz,
        batch=16,
        patience=10,
        device="0" if torch.cuda.is_available() else "cpu",
        project="fruit_detection",
        name=f"exp_{model_name.split('.')[0]}"
    )

    return results

def main():
    """Flujo principal de ejecución"""
    try:
        # 1. Configuración inicial
        setup_environment()

        # 2. Descargar datasets
        dataset_paths = download_datasets()

        # 3. Organizar estructura YOLO
        dataset_dir, _, labels_dir = organize_dataset(dataset_paths)

        # 4. Crear configuración YAML
        yaml_path = create_yaml_config(dataset_dir, labels_dir)

        # 5. Entrenar modelo
        print("\nIniciando entrenamiento...")
        train_model(yaml_path)

        print("\n¡Proceso completado exitosamente!")
    except Exception as e:
        print(f"\nError en el proceso: {str(e)}")

if __name__ == "__main__":
    main()

Mounted at /content/drive
Versiones instaladas:
- Numpy: 1.23.5
- Torch: 2.6.0+cu124
- Ultralytics: 8.3.97
Descargando datasets...
Datasets descargados en:
- /root/.cache/kagglehub/datasets/kvnpatel/fruits-vegetable-detection-for-yolov4/versions/1
- /root/.cache/kagglehub/datasets/kapturovalexander/fruits-by-yolo-fruits-detection/versions/11
¡Dataset organizado en /content/datasets!
Archivo de configuración creado en /content/datasets/data.yaml

Iniciando entrenamiento...
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 251MB/s]

Ultralytics 8.3.97 🚀 Python-3.11.11 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/content/datasets/data.yaml, epochs=50, time=None, patience=10, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cpu, workers=8, project=fruit_detection, name=exp_yolov8n, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, 




Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...


100%|██████████| 755k/755k [00:00<00:00, 101MB/s]


Overriding model.yaml nc=80 with nc=14

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytic

[34m[1mtrain: [0mScanning /content/datasets/labels... 4592 images, 2974 backgrounds, 0 corrupt: 100%|██████████| 7566/7566 [00:09<00:00, 802.75it/s] 


[34m[1mtrain: [0mNew cache created: /content/datasets/labels.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /content/datasets/labels.cache... 4592 images, 2974 backgrounds, 0 corrupt: 100%|██████████| 7566/7566 [00:00<?, ?it/s]


Plotting labels to fruit_detection/exp_yolov8n/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000556, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mfruit_detection/exp_yolov8n[0m
Starting training for 50 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/50         0G     0.9656      3.529      1.266         19        640:  53%|█████▎    | 251/473 [59:22<51:45, 13.99s/it]