In [4]:
!git clone https://github.com/oyyd/frozen_east_text_detection.pb

Cloning into 'frozen_east_text_detection.pb'...
remote: Enumerating objects: 3, done.[K
remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 3[K
Unpacking objects: 100% (3/3), 85.66 MiB | 9.51 MiB/s, done.


In [70]:
import os
import json
import glob


import cv2
import numpy as np

def decode(scores, geometry):
    detections = []
    confidences = []
    height, width = scores.shape[2:4]
    for y in range(0, height):
        scores_data = scores[0][0][y]
        x0_data = geometry[0][0][y]
        x1_data = geometry[0][1][y]
        x2_data = geometry[0][2][y]
        x3_data = geometry[0][3][y]
        angles_data = geometry[0][4][y]
        for x in range(0, width):
            score = scores_data[x]
            if score < 0.5:
                continue
            offset_x = x * 4.0
            offset_y = y * 4.0
            angle = angles_data[x]
            cos_a = np.cos(angle)
            sin_a = np.sin(angle)
            h = x0_data[x] + x2_data[x]
            w = x1_data[x] + x3_data[x]
            end_x = int(offset_x + (cos_a * x1_data[x]) + (sin_a * x2_data[x]))
            end_y = int(offset_y - (sin_a * x1_data[x]) + (cos_a * x2_data[x]))
            start_x = int(end_x - w)
            start_y = int(end_y - h)
            detections.append((start_x, start_y, end_x, end_y))
            confidences.append(score)
    boxes = non_max_suppression(np.array(detections), probs=confidences, overlapThresh=0.5)
    return boxes

def non_max_suppression(boxes, probs=None, overlapThresh=0.3):
    if len(boxes) == 0:
        return []
    if boxes.dtype.kind == "i":
        boxes = boxes.astype("float")
    pick = []
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    if probs is not None:
        idxs = np.argsort(probs)
    else:
        idxs = np.argsort(y2)
    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
        suppress = [last]
        for pos in range(0, last):
            j = idxs[pos]
            xx1 = max(x1[i], x1[j])
            yy1 = max(y1[i], y1[j])
            xx2 = min(x2[i], x2[j])
            yy2 = min(y2[i], y2[j])
            w = max(0, xx2 - xx1 + 1)
            h = max(0, yy2 - yy1 + 1)
            overlap = float(w * h) / area[j]
            if overlap > overlapThresh:
                suppress.append(pos)
        idxs = np.delete(idxs, suppress)
    return boxes[pick].astype("int")



def resize_image(image, height=640, width=640):
    """Resizes an input image to the required size of the EAST text detection model."""
    # Get the original image dimensions
    orig_height, orig_width = image.shape[:2]

    # Calculate the ratio of the original dimensions to the desired dimensions
    height_ratio = height / float(orig_height)
    width_ratio = width / float(orig_width)

    # Determine which ratio to use for resizing
    ratio = min(height_ratio, width_ratio)

    # Calculate the new dimensions
    new_height = int(orig_height * ratio)
    new_width = int(orig_width * ratio)

    # Resize the image
    resized_image = cv2.resize(image, (new_width, new_height))

    # Pad the image to the required size
    delta_w = width - new_width
    delta_h = height - new_height
    top, bottom = delta_h // 2, delta_h - (delta_h // 2)
    left, right = delta_w // 2, delta_w - (delta_w // 2)
    padded_image = cv2.copyMakeBorder(resized_image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(0, 0, 0))

    #Return the resized and padded image
    return padded_image, top, bottom, left, right


def get_text(image_path, net):

  # Define the path to the pre-trained EAST model

  # Load the input image
  image = cv2.imread(image_path)
  height = image.shape[0]
  width  = image.shape[1]
  new_height = 640
  new_width = 640
  image, top, bottom, left, right = resize_image(image, height=new_height, width=new_width)
  w_ratio = width / (new_width - left - right)
  h_ratio = height / (new_height - top - bottom)
  # Get the height and width of the image
  height, width, _ = image.shape

  # Load the pre-trained EAST model
  # Create a blob from the image and set the input for the EAST model
  blob = cv2.dnn.blobFromImage(image, scalefactor=1.0, size=(width, height), mean=(123.68, 116.78, 103.94), swapRB=True, crop=False)
  net.setInput(blob)
  # Get the output from the EAST model
  scores, geometry = net.forward(["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"])
  # Decode the output to get the bounding boxes of the detected text
  boxes = decode(scores, geometry)
  # Draw the bounding boxes on the image

  bbox_list  = []
  for box in boxes:
      x1, y1, x2, y2 = map(int, box)
      x1 = (x1 - left) * w_ratio
      x2 = (x2 - left) * w_ratio
      
      y1 = (y1 - top) * h_ratio
      y2 = (y2 - top) * h_ratio
      bbox_list.append({"class":"text","bbox":[x1, y1, x2, y2]})
      # o_im = cv2.imread(image_path)
      # cv2.rectangle(o_im, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
      # cv2.imwrite("arsla.png",o_im)
  return bbox_list



def get_dict_for_annotations():
    return {
        "_via_settings": {
            "ui": {
                "annotation_editor_height": 25,
                "annotation_editor_fontsize": 0.8,
                "leftsidebar_width": 18,
                "image_grid": {
                    "img_height": 80,
                    "rshape_fill": "none",
                    "rshape_fill_opacity": 0.3,
                    "rshape_stroke": "yellow",
                    "rshape_stroke_width": 2,
                    "show_region_shape": True,
                    "show_image_policy": "all"
                },
                "image": {
                    "region_label": "class",
                    "region_color": "__via_default_region_color__",
                    "region_label_font": "10px Sans",
                    "on_image_annotation_editor_placement": "NEAR_REGION"
                }
            },
            "core": {
                "buffer_size": 18,
                "filepath": {},
                "default_filepath": ""
            },
            "project": {
                "name": "idcard_annotations"
            }
        },
        "_via_img_metadata": {},
        "_via_attributes": {
            "region": {
                "class": {
                    "type": "checkbox",
                    "description": "",
                    "options": {'""': ''},
                    "default_options": {}
                }
            },
            "file": {}
        }
    }


def bboxes_to_via_annotations(bboxes):
    via_annotaitons_data = get_dict_for_annotations()
    for im_path, bbox in bboxes.items():
        im_name = im_path.split("/")[-1]
        im_size = os.path.getsize(im_path)
        im_key = im_name + str(im_size)
        regions = []
        for b in bbox:
            regions += [
                {
                    "shape_attributes": {
                        "name": "rect",
                        "x": b["bbox"][0],
                        "y": b["bbox"][1],
                        "width": b["bbox"][2] - b["bbox"][0],
                        "height": b["bbox"][3] - b["bbox"][1]
                    },
                    "region_attributes": {
                        "class": {
                            b["class"]: True
                        }
                    }
                }
            ]
            via_annotaitons_data["_via_attributes"]["region"]["class"]["options"][b["class"]] = ""
        via_annotaitons_data["_via_img_metadata"][im_key] = {
            "filename": im_name,
            "size": im_size,
            "regions": regions,
            "file_attributes": {}
        }
    return via_annotaitons_data



In [71]:
im_dir_path = "/content/test"


model_path = '/content/frozen_east_text_detection.pb/frozen_east_text_detection.pb'
net = cv2.dnn.readNet(model_path)


im_list = glob.glob(os.path.join(im_dir_path,"*.*g")) + glob.glob(os.path.join(im_dir_path,"*.*G"))

import tqdm
bboxes = {}
for im_path in tqdm.tqdm(im_list):
    bboxes[im_path] = get_text(im_path, net)


via_annotaitons_data = bboxes_to_via_annotations(bboxes)
json.dump(via_annotaitons_data, open("annotations.json","w"))
print("done")


100%|██████████| 12/12 [00:17<00:00,  1.45s/it]

done



