<h1 style="color: orange; font-size: 48px;">SAM</h1>
<hr>


 <font color='orange'>1. Instalación del modelo</font>

 * Se importa el modelo base >> from segment_anything import sam_model_registry, SamPredictor

 * Este modelo NO se entrena, ni está diseñado para esto

 * Se usa el modleo vit_b


In [None]:
from segment_anything import sam_model_registry, SamPredictor

 <font color='orange'>2. Inferencia sobre el modelo SAM</font>

 * SAM viene pre entrenado con millones de imágenes, es capaz él solo de segmentar elementos en una imagen.

 * Se muestra la aplicación se SAM pasandole como prompt o entrada, las cajas de YOLO


In [None]:
import os
import cv2
import numpy as np
from ultralytics import YOLO
import matplotlib.pyplot as plt
from segment_anything import sam_model_registry, SamPredictor

# ---------------------- CONFIGURACIÓN ----------------------
input_dir = r"C:\Users\adina\OneDrive\Documentos\TFM\new\dataset_etiquetado_split\yolo\data\test"
yolo_model_path = r"C:\Users\adina\OneDrive\Documentos\TFM\new\yolo_640\runs\train\yolov8_mold_finetune\weights\best.pt"
sam_checkpoint = r"C:\Users\adina\OneDrive\Documentos\TFM\Preprocesado\Finetuning\sam\sam_vit_b_01ec64.pth"
model_type = "vit_b"
iou_thresh = 0.15
# -----------------------------------------------------------

# Función para calcular IoU
def calcular_iou(boxA, boxB):
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    interW = max(0, xB - xA)
    interH = max(0, yB - yA)
    interArea = interW * interH
    if interArea == 0:
        return 0.0
    boxAArea = (boxA[2] - boxA[0]) * (boxA[3] - boxA[1])
    boxBArea = (boxB[2] - boxB[0]) * (boxB[3] - boxB[1])
    return interArea / float(boxAArea + boxBArea - interArea)

# Cargar modelos
print("Cargando modelos YOLO y SAM...")
yolo_model = YOLO(yolo_model_path)
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint)
predictor = SamPredictor(sam)

# Inicializar métricas globales
tp_total = 0
fp_total = 0
fn_total = 0

# Procesar cada imagen
for img_name in sorted(os.listdir(input_dir)):
    if not img_name.lower().endswith(('.jpg', '.png', '.jpeg')):
        continue
    img_path = os.path.join(input_dir, img_name)
    lbl_path = os.path.splitext(img_path)[0] + ".txt"

    # Leer y preparar imagen
    orig = cv2.imread(img_path)
    h, w = orig.shape[:2]
    rgb = cv2.cvtColor(orig.copy(), cv2.COLOR_BGR2RGB)
    predictor.set_image(rgb)

    # Leer cajas ground-truth
    gt_boxes = []
    if os.path.exists(lbl_path):
        with open(lbl_path) as f:
            for line in f:
                cls, xc, yc, bw, bh = map(float, line.split())
                x1 = int((xc - bw/2) * w)
                y1 = int((yc - bh/2) * h)
                x2 = int((xc + bw/2) * w)
                y2 = int((yc + bh/2) * h)
                gt_boxes.append([x1, y1, x2, y2])

    # Predicción YOLO
    results = yolo_model.predict(img_path, imgsz=640, conf=0.20, verbose=False)
    pred_boxes = [[int(x1), int(y1), int(x2), int(y2)] for x1, y1, x2, y2 in results[0].boxes.xyxy.cpu().numpy()]

    # Calcular métricas por imagen
    tp = 0
    fp = 0
    for box in pred_boxes:
        max_iou = max([calcular_iou(box, gt) for gt in gt_boxes] or [0])
        if max_iou >= iou_thresh:
            tp += 1
        else:
            fp += 1
    fn = len(gt_boxes) - tp

    tp_total += tp
    fp_total += fp
    fn_total += fn

    # Filtrar buenas para SAM
    good_boxes = [box for box in pred_boxes if max([calcular_iou(box, gt) for gt in gt_boxes] or [0]) >= iou_thresh]

    # Crear overlay con detecciones y máscaras
    overlay = rgb.copy()
    # Dibujar cajas YOLO
    for box in pred_boxes:
        max_iou = max([calcular_iou(box, gt) for gt in gt_boxes] or [0])
        color = (0, 0, 255) if max_iou >= iou_thresh else (255, 0, 0)
        cv2.rectangle(overlay, (box[0], box[1]), (box[2], box[3]), color, 2)

    # Superponer máscaras SAM en verde
    for box in good_boxes:
        box_arr = np.array([box])
        masks, scores, _ = predictor.predict(box=box_arr, multimask_output=False)
        mask = masks[0]
        overlay[mask] = (overlay[mask] * 0.5 + np.array([0, 255, 0]) * 0.5).astype(np.uint8)

    # Mostrar resultado combinado
    plt.figure(figsize=(10, 8))
    plt.imshow(overlay)
    plt.title(f"{img_name} - YOLO+SAM: azul=TP rojo=FP verde=mask")
    plt.axis('off')
    plt.show()

# Cálculo métricas finales
accuracy_total = tp_total / (tp_total + fp_total + fn_total) if (tp_total + fp_total + fn_total) > 0 else 0
precision_total = tp_total / (tp_total + fp_total) if (tp_total + fp_total) > 0 else 0
recall_total = tp_total / (tp_total + fn_total) if (tp_total + fn_total) > 0 else 0
f1_total = 2 * precision_total * recall_total / (precision_total + recall_total) if (precision_total + recall_total) > 0 else 0

# Imprimir métricas globales
print(f"TP total: {tp_total}, FP total: {fp_total}, FN total: {fn_total}")
print(f"Accuracy: {accuracy_total:.2f}")
print(f"Precision: {precision_total:.2f}")
print(f"Recall: {recall_total:.2f}")
print(f"F1 Score: {f1_total:.2f}")


Cargando modelos YOLO y SAM...
