<a href="https://colab.research.google.com/github/riddhikaa/detection/blob/main/text_det_mdoel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio


Collecting gradio
  Downloading gradio-5.10.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.3 (from gradio)
  Downloading gradio_client-1.5.3-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.meta

In [None]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.58-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.58-py3-none-any.whl (905 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m905.3/905.3 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.13-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.58 ultralytics-thop-2.0.13


In [None]:
!pip install torch torchvision onnx onnxruntime albumentations

Collecting onnxruntime
  Downloading onnxruntime-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxruntime-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (13.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.3/13.3 MB[0m [31m58.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading humanfriendly-10.0-py2.py3-none-any.whl (86 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected pack

In [None]:
import cv2
import torch
from ultralytics import YOLO
import gradio as gr
import numpy as np

class FastDetector:
    def __init__(self, model_path, max_size=800):  # Reduced from 1024 to 800
        self.max_size = max_size
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Optimize CUDA settings
        if self.device.type == 'cuda':
            torch.cuda.empty_cache()
            torch.backends.cudnn.benchmark = True
            torch.backends.cudnn.deterministic = False
            torch.backends.cuda.matmul.allow_tf32 = True
            torch.backends.cudnn.allow_tf32 = True

        # Load and optimize model
        self.model = YOLO(model_path)
        self.model.to(self.device)

        if self.device.type == 'cuda':
            # Convert model to half precision (FP16)
            self.model = self.model.half()
            self.model.fuse()

            # Warmup with half precision
            dummy_input = torch.zeros((1, 3, max_size, max_size),
                                   device=self.device,
                                   dtype=torch.float16)
            with torch.no_grad():
                self.model(dummy_input)
            torch.cuda.synchronize()

        # Initialize cache with fixed size
        self.cache_size = 15  # reduced cache size to 15
        self.image_cache = {}

    def preprocess_image(self, image):
        """Optimized image preprocessing"""
        # Convert to RGB only if needed
        if len(image.shape) == 2:
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
        elif image.shape[2] == 4:
            image = cv2.cvtColor(image, cv2.COLOR_BGRA2RGB)

        # Resize if needed
        h, w = image.shape[:2]
        scale = min(self.max_size / h, self.max_size / w)

        if scale < 1:
            new_h, new_w = int(h * scale), int(w * scale)
            image = cv2.resize(image, (new_w, new_h),
                             interpolation=cv2.INTER_AREA)

        return image, scale

    def detect(self, image_path):
        # Quick cache check
        if image_path in self.image_cache:
            return self.image_cache[image_path]

        # Read image efficiently
        image = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
        if image is None:
            return None

        # Process image
        original_image = image.copy()
        processed_image, scale = self.preprocess_image(image)

        # Run optimized inference
        with torch.no_grad(), torch.cuda.amp.autocast() if self.device.type == 'cuda' else nullcontext():
            results = self.model.predict(
                source=processed_image,
                conf=0.6,  # increased confidence threshold
                iou=0.4,   # reduced IOU threshold
                max_det=30,  # max detections reduced to 30
                device=self.device,
                half=True if self.device.type == 'cuda' else False,
            )

        # Process detections efficiently
        boxes = results[0].boxes
        if len(boxes) > 0:
            # Batch process coordinates
            coords = boxes.xyxy.cpu().numpy()
            if scale < 1:
                coords /= scale

            classes = boxes.cls.cpu().numpy()
            confs = boxes.conf.cpu().numpy()

            # Draw all boxes
            for coord, cls_id, conf in zip(coords, classes, confs):
                x1, y1, x2, y2 = map(int, coord)
                cls = list(ENTITIES_COLORS)[int(cls_id)]

                # Efficient drawing
                cv2.rectangle(original_image, (x1, y1), (x2, y2),
                            ENTITIES_COLORS[cls], 2)

                # Optimize text rendering
                text = f"{cls} {conf:.2f}"
                cv2.putText(original_image, text, (x1, y1-5),
                           cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                           ENTITIES_COLORS[cls], 2)

        # Update cache with size limit
        if len(self.image_cache) >= self.cache_size:
            self.image_cache.pop(next(iter(self.image_cache)))
        self.image_cache[image_path] = original_image

        return original_image

class nullcontext:
    def __enter__(self): return None
    def __exit__(self, *args): return None

# Initialize detector with smaller max size
detector = FastDetector('dla-model (1).pt', max_size=800)

# Create Gradio interface
iface = gr.Interface(
    fn=detector.detect,
    inputs=gr.Image(label="Upload scanned document", type="filepath"),
    outputs="image",
)

if __name__ == "__main__":
    iface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://356ac099a66fe92699.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
