## Paquetes Basicos

In [1]:
import json
import os
import shutil
import numpy as np
from sklearn.model_selection import train_test_split

## Separación de imágenes entre entrenamiento y prueba

In [2]:
# Transforma de formato Coco a formato Yolo
def coco_to_yolo(x, y, w, h, width, height):
    return [((2*x + w)/(2*width)), ((2*y + h)/(2*height)), w/width, h/height]

In [5]:
def split_data(data, test_size=0.2, random_state=42):
    images = data['images']
    ids = [img['id'] for img in images]
    
    train_ids, rest_ids = train_test_split(ids, test_size=test_size, random_state=random_state)

    test_ids, val_ids = train_test_split(rest_ids, test_size=0.5, random_state=random_state)
    
    train_data = {'images': [img for img in images if img['id'] in train_ids],
                  'annotations': [ann for ann in data['annotations'] if ann['image_id'] in train_ids]}
    
    test_data = {'images': [img for img in images if img['id'] in test_ids],
                'annotations': [ann for ann in data['annotations'] if ann['image_id'] in test_ids]}

    val_data = {'images': [img for img in images if img['id'] in val_ids],
                'annotations': [ann for ann in data['annotations'] if ann['image_id'] in val_ids]}
    
    return train_data, test_data, val_data

def preprocessing(partition: str, input_path: str, output_path: str, data: object):
    os.makedirs(f"{output_path}/images/{partition}", exist_ok=True)
    os.makedirs(f"{output_path}/labels/{partition}", exist_ok=True)

    count = 1
    for fileinfo in data['images']:
        origin = f"{input_path}/plaga_processed/{fileinfo['file_name']}"
        img_id = fileinfo['id']
        width = fileinfo['width']
        height = fileinfo['height']

        img_ann = get_img_ann(img_id)

        if os.path.exists(origin):
            items = []
            for ann in img_ann:
                label = ann['category_id'] - 1
                box = ann['bbox']
                xc, yc, w, h = coco_to_yolo(box[0], box[1], box[2], box[3], width, height)
                items.append(f"{label} {xc} {yc} {w} {h}")

            with open(f"{output_path}/labels/{partition}/img{count}.txt", "w") as f:
                for item in items:
                    f.write(f"{item}\n")
        else:
            origin = f"{input_path}/sin_plaga_processed/{fileinfo['file_name']}"

        destination = f"{output_path}/images/{partition}/img{count}.png"
        try:
            shutil.copy(origin, destination)
        except shutil.SameFileError:
            print("Source and destination represents the same file.")

        count += 1

def get_img_ann(image_id):
    img_ann = []
    isFound = False
    for ann in data['annotations']:
        if ann['image_id'] == image_id:
            img_ann.append(ann)
            isFound = True
    if isFound:
        return img_ann
    else:
        return None

input_path = r"../data/raw"
output_path = r"../data/raw/dataYolo" 

# Cargar datos desde el archivo JSON
f = open(f"{input_path}/aguacate_data.json")
data = json.load(f)
f.close()

# Dividir datos en conjuntos de entrenamiento, prueba y validación
train_data, test_data, val_data = split_data(data, test_size=0.2, random_state=42)

# Procesar conjuntos de entrenamiento, prueba y validación
preprocessing("train", input_path, output_path, train_data)
preprocessing("test", input_path, output_path, test_data)
preprocessing("validation", input_path, output_path, val_data)

../data/raw/plaga_processed/Heilipus (1).png
../data/raw/plaga_processed/Heilipus (10).png
../data/raw/plaga_processed/Heilipus (101).png
../data/raw/plaga_processed/Heilipus (102).png
../data/raw/plaga_processed/Heilipus (103).png
../data/raw/plaga_processed/Heilipus (104).png
../data/raw/plaga_processed/Heilipus (105).png
../data/raw/plaga_processed/Heilipus (106).png
../data/raw/plaga_processed/Heilipus (107).png
../data/raw/plaga_processed/Heilipus (109).png
../data/raw/plaga_processed/Heilipus (11).png
../data/raw/plaga_processed/Heilipus (110).png
../data/raw/plaga_processed/Heilipus (111).png
../data/raw/plaga_processed/Heilipus (112).png
../data/raw/plaga_processed/Heilipus (113).png
../data/raw/plaga_processed/Heilipus (114).png
../data/raw/plaga_processed/Heilipus (115).png
../data/raw/plaga_processed/Heilipus (116).png
../data/raw/plaga_processed/Heilipus (117).png
../data/raw/plaga_processed/Heilipus (118).png
../data/raw/plaga_processed/Heilipus (119).png
../data/raw/plaga

## Entrenamiento y Evaluación del Modelo YOLO

In [9]:
!pip install ultralytics
from ultralytics import YOLO



In [12]:
model = YOLO("yolov8s.pt")
model.train(data="train.yaml", epochs=10)

New https://pypi.org/project/ultralytics/8.2.67 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.227 🚀 Python-3.11.5 torch-2.1.2+cpu CPU (Intel Core(TM) i5-9300H 2.40GHz)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=train.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train5, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, show=False, save_frames=False, save_txt=False, save_conf

[34m[1mtrain: [0mScanning D:\proyectoGrado\mlops_project\data\raw\dataYolo\labels\train... 555 images, 40 backgrounds, 0 corrupt: 100%|██████████| 595/595 [00:02<00:00, 262.89it/s]

[34m[1mtrain: [0mNew cache created: D:\proyectoGrado\mlops_project\data\raw\dataYolo\labels\train.cache



[34m[1mval: [0mScanning D:\proyectoGrado\mlops_project\data\raw\dataYolo\labels\validation... 68 images, 7 backgrounds, 0 corrupt: 100%|██████████| 75/75 [00:00<00:00, 227.17it/s]

[34m[1mval: [0mNew cache created: D:\proyectoGrado\mlops_project\data\raw\dataYolo\labels\validation.cache





Plotting labels to runs\detect\train5\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)


2024/07/28 09:06:27 INFO mlflow.tracking.fluent: Experiment with name '/Shared/YOLOv8' does not exist. Creating a new experiment.
2024/07/28 09:06:28 INFO mlflow.tracking.fluent: Autologging successfully enabled for statsmodels.
2024/07/28 09:06:30 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


[34m[1mMLflow: [0mlogging run_id(86102a72cfce480d8fb68c649520f457) to runs\mlflow
[34m[1mMLflow: [0mview at http://127.0.0.1:5000 with 'mlflow server --backend-store-uri runs\mlflow'
[34m[1mMLflow: [0mdisable with 'yolo settings mlflow=False'
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns\detect\train5[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10         0G      1.565      3.395      1.181          3        640: 100%|██████████| 38/38 [15:51<00:00, 25.03s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:39<00:00, 13.06s/it]

                   all         75         68       0.83       0.79      0.855      0.541






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10         0G      1.428      1.242      1.133          3        640: 100%|██████████| 38/38 [13:51<00:00, 21.88s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:44<00:00, 14.82s/it]

                   all         75         68      0.809      0.561      0.624      0.366






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10         0G      1.451      1.066      1.172          3        640: 100%|██████████| 38/38 [11:47<00:00, 18.63s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:34<00:00, 11.66s/it]

                   all         75         68       0.93      0.956      0.973      0.595






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10         0G       1.38     0.9335      1.139          2        640: 100%|██████████| 38/38 [14:35<00:00, 23.04s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:43<00:00, 14.56s/it]

                   all         75         68      0.973      0.868      0.966      0.616






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10         0G      1.384     0.9144      1.189          3        640: 100%|██████████| 38/38 [13:13<00:00, 20.89s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:38<00:00, 12.67s/it]

                   all         75         68      0.906      0.941      0.973      0.613






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10         0G       1.37     0.8516      1.165          2        640: 100%|██████████| 38/38 [13:01<00:00, 20.58s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:39<00:00, 13.31s/it]

                   all         75         68      0.942      0.956      0.971      0.651






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10         0G       1.25     0.7107      1.081          3        640: 100%|██████████| 38/38 [12:54<00:00, 20.37s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:37<00:00, 12.37s/it]

                   all         75         68      0.944      0.985      0.984      0.687






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10         0G      1.225      0.702      1.079          3        640: 100%|██████████| 38/38 [12:53<00:00, 20.35s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:39<00:00, 13.25s/it]

                   all         75         68       0.97      0.971      0.988      0.688






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10         0G      1.227     0.6681      1.077          2        640: 100%|██████████| 38/38 [12:37<00:00, 19.93s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:39<00:00, 13.12s/it]

                   all         75         68      0.978      0.985      0.991      0.707






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10         0G      1.191     0.6171      1.068          2        640: 100%|██████████| 38/38 [12:48<00:00, 20.22s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:40<00:00, 13.34s/it]

                   all         75         68      0.981      0.985      0.989      0.729






10 epochs completed in 2.339 hours.
Optimizer stripped from runs\detect\train5\weights\last.pt, 22.5MB
Optimizer stripped from runs\detect\train5\weights\best.pt, 22.5MB

Validating runs\detect\train5\weights\best.pt...
Ultralytics YOLOv8.0.227 🚀 Python-3.11.5 torch-2.1.2+cpu CPU (Intel Core(TM) i5-9300H 2.40GHz)
Model summary (fused): 168 layers, 11125971 parameters, 0 gradients, 28.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:34<00:00, 11.66s/it]


                   all         75         68      0.981      0.985      0.989      0.729
Speed: 3.3ms preprocess, 300.9ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns\detect\train5[0m
[34m[1mMLflow: [0mresults logged to runs\mlflow
[34m[1mMLflow: [0mdisable with 'yolo settings mlflow=False'


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x000002878011EA90>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.0480

## Evaluación de desempeño (conjunto de validación)

In [33]:
metrics = model.val(data="train.yaml", split="val")
matriz_confusion = metrics.confusion_matrix.matrix

# Extraer TP, FP, FN, TN
TP = matriz_confusion[0, 0]
FP = matriz_confusion[0, 1]
FN = matriz_confusion[1, 0]
TN = matriz_confusion[1, 1]

# Calcular el Accuracy, Precision y Recall
accuracy = (TP + TN) / (TP + FP + FN + TN)
precision = TP / (TP + FP)
recall = TP / (TP + FN)

print(f'Accuracy al final de la ultima época: {accuracy*100}')
print(f'Precision al final de la ultima época: {precision*100}')
print(f'Recall al final de la ultima época: {recall*100}')

Ultralytics YOLOv8.0.227 🚀 Python-3.11.5 torch-2.1.2+cpu CPU (Intel Core(TM) i5-9300H 2.40GHz)


[34m[1mval: [0mScanning D:\proyectoGrado\mlops_project\data\raw\dataYolo\labels\validation.cache... 68 images, 7 backgrounds, 0 corrupt: 100%|██████████| 75/75 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 5/5 [00:38<00:00,  7.69s/it]


                   all         75         68      0.981      0.985      0.991       0.73
Speed: 2.6ms preprocess, 329.1ms inference, 0.0ms loss, 0.6ms postprocess per image
Results saved to [1mruns\detect\train54[0m
Accuracy al final de la ultima época: 95.71428571428572
Precision al final de la ultima época: 97.10144927536231
Recall al final de la ultima época: 98.52941176470588


## Evaluación de desempeño (conjunto de prueba)

In [34]:
metrics = model.val(data="train.yaml", split="test")
matriz_confusion = metrics.confusion_matrix.matrix

# Extraer TP, FP, FN, TN
TP = matriz_confusion[0, 0]
FP = matriz_confusion[0, 1]
FN = matriz_confusion[1, 0]
TN = matriz_confusion[1, 1]

# Calcular el Accuracy, Precision y Recall
accuracy = (TP + TN) / (TP + FP + FN + TN)
precision = TP / (TP + FP)
recall = TP / (TP + FN)

print(f'Accuracy al final de la ultima época: {accuracy*100}')
print(f'Precision al final de la ultima época: {precision*100}')
print(f'Recall al final de la ultima época: {recall*100}')

Ultralytics YOLOv8.0.227 🚀 Python-3.11.5 torch-2.1.2+cpu CPU (Intel Core(TM) i5-9300H 2.40GHz)


[34m[1mval: [0mScanning D:\proyectoGrado\mlops_project\data\raw\dataYolo\labels\test.cache... 71 images, 3 backgrounds, 0 corrupt: 100%|██████████| 74/74 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 5/5 [00:35<00:00,  7.19s/it]


                   all         74         71      0.933      0.976      0.985      0.661
Speed: 2.5ms preprocess, 306.5ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns\detect\train55[0m
Accuracy al final de la ultima época: 93.24324324324324
Precision al final de la ultima época: 95.83333333333334
Recall al final de la ultima época: 97.1830985915493
