<a href="https://colab.research.google.com/github/shuvamg007/text_detection/blob/master/east_text_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from matplotlib import pyplot as plt
from tqdm import tqdm
import numpy as np
import cv2
import os

In [0]:
%%capture
!wget https://www.dropbox.com/s/r2ingd0l3zt8hxs/frozen_east_text_detection.tar.gz?dl=1 && mv frozen_east_text_detection.tar.gz?dl=1 frozen_east_text_detection.tar.gz
!tar xvzf frozen_east_text_detection.tar.gz
!wget https://s3.amazonaws.com/tech-interview/text_detection.zip && unzip text_detection.zip
!mkdir results

In [0]:
def detect(scores, geometry, threshold):
  detections = []
  confidences = []

  height = scores.shape[2]
  width = scores.shape[3]
  for y in range(height):

    score = scores[0][0][y]
    x0 = geometry[0][0][y]
    x1 = geometry[0][1][y]
    x2 = geometry[0][2][y]
    x3 = geometry[0][3][y]
    angle = geometry[0][4][y]
    
    for x in range(width):

      # continue only if score > threshold
      if(score[x] > threshold):

        offsetX = x * 4.0
        offsetY = y * 4.0

        cos = math.cos(angle[x])
        sin = math.sin(angle[x])
        
        h = x0[x] + x2[x]
        w = x1[x] + x3[x]

        offsetX += (cos * x1[x]) + (sin * x2[x])
        offsetY -= (sin * x1[x]) - (cos * x2[x])

        p1 = (-(sin * h) + offsetX, -(cos * h) + offsetY)
        p3 = (-(cos * w) + offsetX,  (sin * w) + offsetY)
        
        center = (0.5 * (p1[0] + p3[0]), 0.5 * (p1[1] + p3[1]))
        
        detections.append((center, (w, h), -(angle[x] * 180.0) / math.pi))
        confidences.append(float(score[x]))

  return [detections, confidences]

In [0]:
east_model = 'frozen_east_text_detection.pb'

# EAST expects input images to be of dimensions in multiple of 32
img_width = 320
img_height = 320

res_images = []
nms_threshold = 0.5
confidence = 0.4

# loading pre-trained model
model = cv2.dnn.readNet(east_model)
directory = 'text_detection/images/'

layers = [
  "feature_fusion/Conv_7/Sigmoid",
  "feature_fusion/concat_3"
]

dataset_mean = (123.68, 116.78, 103.94)


for file in tqdm(os.listdir(directory)):

  image = cv2.imread(directory + file)
  img_copy = image.copy()
  (H, W) = image.shape[:2]

  rW = W / float(img_width)
  rH = H / float(img_height)

  image = cv2.resize(image, (img_width, img_height))

  blob = cv2.dnn.blobFromImage(image, 1.0, (img_width, img_height), dataset_mean, swapRB=True, crop=False)
  model.setInput(blob)
  (scores, geometry) = model.forward(layers)
  # t, _ = model.getPerfProfile()
  # print('Elapsed time (ms): ' + str(t * 1000.0 / cv2.getTickFrequency()))


  detections, confidences = detect(scores, geometry, confidence)
  indices = cv2.dnn.NMSBoxesRotated(detections, confidences, confidence, nms_threshold)

  # loop over the bounding boxes
  for i in indices:

    vertices = cv2.boxPoints(detections[i[0]])
    for j in range(4):
      vertices[j][0] *= rW
      vertices[j][1] *= rH

    for j in range(4):
      p1 = (vertices[j][0], vertices[j][1])
      p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1])
      cv2.line(img_copy, p1, p2, (0, 255, 0), 2);

  # res_images.append(img_copy)
  cv2.imwrite('results/' + file, img_copy)