<a href="https://colab.research.google.com/github/sandrarairan/Gradio_MCP_space-Rf_detr/blob/main/Gradio_MCP_space_Rf_detr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -qU rfdetr supervision gradio

## GRADIO

https://huggingface.co/blog/gradio-mcp

In [None]:
import gradio as gr
from PIL import Image
from tqdm import tqdm
from typing import TypeVar, Tuple
import numpy as np
from rfdetr import RFDETRBase, RFDETRLarge
from rfdetr.util.coco_classes import COCO_CLASSES
import supervision as sv
from rfdetr.detr import RFDETR

import datetime
import os
import shutil
import uuid


def create_directory(directory_path: str) -> None:
    if not os.path.exists(directory_path):
        os.makedirs(directory_path)


def delete_directory(directory_path: str) -> None:
    if not os.path.exists(directory_path):
        raise FileNotFoundError(f"Directory '{directory_path}' does not exist.")

    try:
        shutil.rmtree(directory_path)
    except PermissionError:
        raise PermissionError(
            f"Permission denied: Unable to delete '{directory_path}'.")


def generate_unique_name():
    current_datetime = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
    unique_id = uuid.uuid4()
    return f"{current_datetime}_{unique_id}"

MAX_VIDEO_LENGTH_SECONDS = 5
VIDEO_SCALE_FACTOR = 0.5
VIDEO_TARGET_DIRECTORY = "tmp"

create_directory(directory_path=VIDEO_TARGET_DIRECTORY)

def video_processing_inference(
        input_video: str,
        confidence: float,
        resolution: int,
        checkpoint: str,
        progress=gr.Progress(track_tqdm=True)
):
    model = load_model(resolution=resolution, checkpoint=checkpoint)

    name = generate_unique_name()
    output_video = os.path.join(VIDEO_TARGET_DIRECTORY, f"{name}.mp4")

    video_info = sv.VideoInfo.from_video_path(input_video)
    video_info.width = int(video_info.width * VIDEO_SCALE_FACTOR)
    video_info.height = int(video_info.height * VIDEO_SCALE_FACTOR)

    total = min(video_info.total_frames, video_info.fps * MAX_VIDEO_LENGTH_SECONDS)
    frames_generator = sv.get_video_frames_generator(input_video, end=total)

    with sv.VideoSink(output_video, video_info=video_info) as sink:
        for frame in tqdm(frames_generator, total=total):
            annotated_frame = detect_and_annotate(
                model=model,
                image=frame,
                confidence=confidence
            )
            annotated_frame = sv.scale_image(annotated_frame, VIDEO_SCALE_FACTOR)
            sink.write_frame(annotated_frame)

    return output_video


## imagen

ImageType = TypeVar("ImageType", Image.Image, np.ndarray)

COLOR = sv.ColorPalette.from_hex([
    "#ffff00", "#ff9b00", "#ff8080", "#ff66b2", "#ff66ff", "#b266ff",
    "#9999ff", "#3399ff", "#66ffff", "#33ff99", "#66ff66", "#99ff00"
])


def calculate_resolution_wh(image: ImageType) -> Tuple[int, int]:
    if isinstance(image, Image.Image):
        return image.size
    elif isinstance(image, np.ndarray):
        if image.ndim >= 2:
            h, w = image.shape[:2]
            return w, h
        else:
            raise ValueError("Input numpy array image must have at least 2 dimensions (height, width).")
    else:
        raise TypeError("Input image must be a Pillow Image or a numpy array.")


def detect_and_annotate(
        model: RFDETR,
        image: ImageType,
        confidence: float
) -> ImageType:
    detections = model.predict(image, threshold=confidence)

    resolution_wh = calculate_resolution_wh(image)
    text_scale = sv.calculate_optimal_text_scale(resolution_wh=resolution_wh) - 0.4
    thickness = sv.calculate_optimal_line_thickness(resolution_wh=resolution_wh)-2

    bbox_annotator = sv.BoxAnnotator(color=COLOR, thickness=thickness)
    label_annotator = sv.LabelAnnotator(
        color=COLOR,
        text_color=sv.Color.BLACK,
        text_scale=text_scale,
        text_padding=1

    )

    labels = [
        f"{COCO_CLASSES[class_id]} {conf:.2f}"
        for class_id, conf in zip(detections.class_id, detections.confidence)
    ]

    annotated_image = image.copy()
    annotated_image = bbox_annotator.annotate(annotated_image, detections)
    annotated_image = label_annotator.annotate(annotated_image, detections, labels)
    return annotated_image


def load_model(resolution: int, checkpoint: str) -> RFDETR:
    if checkpoint == "base":
        return RFDETRBase(resolution=resolution)
    elif checkpoint == "large":
        return RFDETRLarge(resolution=resolution)
    raise TypeError("Checkpoint must be 'base' or 'large'")


def image_processing_inference(
        input_image: Image.Image,
        confidence: float,
        resolution: int,
        checkpoint: str
):
    input_image = input_image.resize((resolution, resolution))  # Asegura tama침o correcto
    model = load_model(resolution=resolution, checkpoint=checkpoint)
    return detect_and_annotate(model=model, image=input_image, confidence=confidence)






In [None]:
# Interfaz Gradio
def gradio_interface(image, confidence, resolution, checkpoint):
    return image_processing_inference(
        input_image=image,
        confidence=confidence,
        resolution=resolution,
        checkpoint=checkpoint
    )


demo = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Image(type="pil", label="Sube una imagen"),
        gr.Slider(0.1, 1.0, value=0.5, step=0.05, label="Umbral de confianza"),
        gr.Slider(320, 1400, step=8, value=728, label="Resoluci칩n de entrada"),
        gr.Radio(choices=["base", "large"], value="base", label="Modelo (checkpoint)")
    ],
    outputs=gr.Image(type="pil", label="Resultado con detecciones"),
    title="Demo RF-DETR con Gradio",
    description="Sube una imagen y selecciona el modelo, resoluci칩n y umbral de confianza para realizar detecci칩n de objetos."
)

if __name__ == "__main__":
    demo.launch(mcp_server=True)