# Hello Object Detection - Deployment

A very basic introduction to using object detection models with OpenVINO™.

The [horizontal-text-detection-0001](https://github.com/openvinotoolkit/open_model_zoo/blob/master/models/intel/horizontal-text-detection-0001/README.md) model from [Open Model Zoo](https://github.com/openvinotoolkit/open_model_zoo/) is used. It detects horizontal text in images and returns a blob of data in the shape of `[100, 5]`. Each detected text box is stored in the `[x_min, y_min, x_max, y_max, conf]` format, where the
`(x_min, y_min)` are the coordinates of the top left bounding box corner, `(x_max, y_max)` are the coordinates of the bottom right bounding box corner and `conf` is the confidence for the predicted class.

## 6. 배포

- UI/UX 고려
- 프로토 타입 형태 배포: OpenCV 프레임, Gradio, Streamlit 

### 6-1. Gradio
https://www.gradio.app/guides/quickstart

In [44]:
import openvino as ov
import cv2
import numpy as np
import PIL
from pathlib import Path
import gradio as gr

In [45]:
core = ov.Core()
options=core.available_devices

options

['CPU', 'GPU']

In [46]:
model = core.read_model(model="model/horizontal-text-detection-0001.xml")
compiled_model = core.compile_model(model=model, device_name="CPU")

input_layer = compiled_model.input(0)
output_layer = compiled_model.output("boxes")

In [47]:
def preprocess(image):

    image = np.array(image)  # Convert PIL image to numpy array
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # Convert RGB to BGR for OpenCV
    
    N, C, H, W = input_layer.shape
    resized_image = cv2.resize(image, (W, H))
    input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)
    
    return input_image, resized_image

In [48]:
def convert_result_to_image(rgb_image, resized_image, boxes, threshold=0.3, conf_labels=True):
    # Define colors for boxes and descriptions.
    colors = {"red": (255, 0, 0), "green": (0, 255, 0)}

    # Fetch the image shapes to calculate a ratio.
    (real_y, real_x), (resized_y, resized_x) = (
        rgb_image.shape[:2],
        resized_image.shape[:2],
    )
    ratio_x, ratio_y = real_x / resized_x, real_y / resized_y

    # Iterate through non-zero boxes.
    for box in boxes:
        # Pick a confidence factor from the last place in an array.
        conf = box[-1]
        if conf > threshold:
            # Convert float to int and multiply corner position of each box by x and y ratio.
            # If the bounding box is found at the top of the image,
            # position the upper box bar little lower to make it visible on the image.
            (x_min, y_min, x_max, y_max) = [
                (int(max(corner_position * ratio_y, 10)) if idx % 2 else int(corner_position * ratio_x)) for idx, corner_position in enumerate(box[:-1])
            ]

            # Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.
            rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3)

            # Add text to the image based on position and confidence.
            # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.
            if conf_labels:
                rgb_image = cv2.putText(
                    rgb_image,
                    f"{conf:.2f}",
                    (x_min, y_min - 10),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.8,
                    colors["red"],
                    1,
                    cv2.LINE_AA,
                )

    return rgb_image

In [49]:
def predict_image(image):
    input_image, resized_image = preprocess(image)  # Preprocess the image
    
    # Create an inference request.
    boxes = compiled_model([input_image])[output_layer]
    # Remove zero only boxes.
    boxes = boxes[~np.all(boxes == 0, axis=1)]
    
    canvas = convert_result_to_image(image, resized_image, boxes, conf_labels=False)
    return canvas

In [50]:
# Set up the Gradio interface
demo = gr.Interface(predict_image, gr.Image(), "image")
demo.launch()

Running on local URL:  http://127.0.0.1:7869

To create a public link, set `share=True` in `launch()`.




### 6-2. OpenCV

In [55]:
import openvino as ov
import cv2
import numpy as np
import PIL
from pathlib import Path

In [56]:
model = core.read_model(model="model/horizontal-text-detection-0001.xml")
compiled_model = core.compile_model(model=model, device_name="CPU")

input_layer = compiled_model.input(0)
output_layer = compiled_model.output("boxes")

In [57]:
def preprocess(image):

    image = np.array(image)  # Convert PIL image to numpy array
    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)  # Convert RGB to BGR for OpenCV
    
    N, C, H, W = input_layer.shape
    resized_image = cv2.resize(image, (W, H))
    input_image = np.expand_dims(resized_image.transpose(2, 0, 1), 0)
    
    return input_image, resized_image

In [58]:
def convert_result_to_image(rgb_image, resized_image, boxes, threshold=0.3, conf_labels=True):
    # Define colors for boxes and descriptions.
    colors = {"red": (255, 0, 0), "green": (0, 255, 0)}

    # Fetch the image shapes to calculate a ratio.
    (real_y, real_x), (resized_y, resized_x) = (
        rgb_image.shape[:2],
        resized_image.shape[:2],
    )
    ratio_x, ratio_y = real_x / resized_x, real_y / resized_y

    # Iterate through non-zero boxes.
    for box in boxes:
        # Pick a confidence factor from the last place in an array.
        conf = box[-1]
        if conf > threshold:
            # Convert float to int and multiply corner position of each box by x and y ratio.
            # If the bounding box is found at the top of the image,
            # position the upper box bar little lower to make it visible on the image.
            (x_min, y_min, x_max, y_max) = [
                (int(max(corner_position * ratio_y, 10)) if idx % 2 else int(corner_position * ratio_x)) for idx, corner_position in enumerate(box[:-1])
            ]

            # Draw a box based on the position, parameters in rectangle function are: image, start_point, end_point, color, thickness.
            rgb_image = cv2.rectangle(rgb_image, (x_min, y_min), (x_max, y_max), colors["green"], 3)

            # Add text to the image based on position and confidence.
            # Parameters in text function are: image, text, bottom-left_corner_textfield, font, font_scale, color, thickness, line_type.
            if conf_labels:
                rgb_image = cv2.putText(
                    rgb_image,
                    f"{conf:.2f}",
                    (x_min, y_min - 10),
                    cv2.FONT_HERSHEY_SIMPLEX,
                    0.8,
                    colors["red"],
                    1,
                    cv2.LINE_AA,
                )

    return rgb_image

In [59]:
def predict_image(image_path, background_image_path):
    input_image, resized_image = preprocess(image)  # Preprocess the image
    
    # Create an inference request.
    boxes = compiled_model([input_image])[output_layer]
    # Remove zero only boxes.
    boxes = boxes[~np.all(boxes == 0, axis=1)]
    
    canvas = convert_result_to_image(image, resized_image, boxes, conf_labels=False)
    return canvas

In [67]:
def predict_image(image_path, background_image_path):
    # Read the input image
    image = cv2.imread(image_path)
    input_image, resized_image = preprocess(image)  # Preprocess the image
    
    # Create an inference request.
    boxes = compiled_model([input_image])[output_layer]
    # Remove zero only boxes.
    boxes = boxes[~np.all(boxes == 0, axis=1)]
    
    canvas = convert_result_to_image(image, resized_image, boxes, conf_labels=False)

# Read the background image
    bg = cv2.imread(background_image_path)

    # Resize the input image to match the proper position of background image
    image_h, image_w = image.shape[0], image.shape[1]
    new_h = 500
    new_w = int((new_h/image_h)*image_w)
    image_resize = cv2.resize(image, (new_w, new_h))

    
    xmax = bg.shape[1] - 300
    ymax = bg.shape[0] - 170
    xmin = xmax - new_w
    ymin = ymax - new_h
    
    # Overlay the input image on the background image
    bg[ymin:ymax, xmin:xmax] = image_resize

    # Display the final combined image
    cv2.imshow("Sample Image with Prediction on Background", bg)

    # Wait for a key press and close the window
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# Example usage
sample_image_path = "./data/intel_rnb.jpg"  # Replace with your actual image file path
#sample_image_path = "./data/starbucks.jpg"  # Replace with your actual image file path
background_image_path = "./data/background.jpg"  # Replace with your actual background image file path

predict_image(sample_image_path, background_image_path)