In [1]:
# import the necessary packages
from imutils.object_detection import non_max_suppression
import numpy as np
import argparse
import time
import cv2
import os

In [2]:
# Path to files
image_folder = 'images'
image_name = 'lebron_james.jpg'
imagepath = os.path.join(image_folder, image_name)
model = 'frozen_east_text_detection.pb'
min_prob = 0.5
height = 320
width = 320

In [3]:
# Loading the image and get the image dimensions
image = cv2.imread(imagepath)
orig = image.copy()
(h, w) = image.shape[:2]

In [4]:
(newW, newH) = (width, height)
rW = w / float(newW)
rH = h / float(newH)

# resize the image and grab the new image dimensions
image = cv2.resize(image, (newW, newH))
(h, w) = image.shape[:2]

In [5]:
# loading the EAST text detector
model = cv2.dnn.readNet(model)

In [6]:
# layers that are needed to be extracted
layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

In [7]:
blob = cv2.dnn.blobFromImage(image, 1.0, (w,h), (123.68, 116.78, 103.94), swapRB=True, crop=False)

In [8]:
# View the blob that is created
blobb = blob.reshape(blob.shape[2],blob.shape[3],blob.shape[1])
cv2.imshow('Blob',blobb)
cv2.waitKey(5000)
cv2.destroyAllWindows()

In [9]:
# obtain the output layers sets
start = time.time()
model.setInput(blob)
(score, geometry) = model.forward(layerNames)
end = time.time()

print("Text detection took : {} seconds".format(end-start))


Text detection took : 0.3570098876953125 seconds


In [10]:
score.shape[2:4]

(80, 80)

In [11]:
geometry.shape

(1, 5, 80, 80)

In [12]:
(num_rows, num_cols) = score.shape[2:4]

In [13]:
rects = []
confidence = []
for y in range(0, num_rows):
    score_data = score[0,0,y]
    x0 = geometry[0,0,y]
    x1 = geometry[0,1,y]
    x2 = geometry[0,2,y]
    x3 = geometry[0,3,y]
    angles_data = geometry[0,4,y]
    
    for x in range(0, num_cols):
        if score_data.all() < min_prob:
            continue
        
        # compute the offset factor as our resulting feature maps will be 4x smaller than input image
        (offsetX, offsetY) = (x * 4.0, y * 4.0)
        
        # calculate the rotation angle 
        angle = angles_data[x]
        cos = np.cos(angle)
        sin = np.sin(angle)
        
        # use geometry volume to derive the the width and height of bounding box
        h = x0[x] + x1[x]
        w = x1[x] + x3[x]
        
        # compute both starting and ending (x, y) coordinates for text prediction
        endX = int(offsetX + (cos * x1[x]) + (sin * x2[x]))
        endY = int(offsetY + (sin * x1[x]) + (cos * x2[x]))
        startX = int(endX - w)
        startY = int(endY - h)
        
        # add the bounding box coordinates and probabiity score
        rects.append((startX, startY, endX, endY))
        confidence.append(score_data[x])
        
# apply non-maxima suppression to suppress weak, overlapping bounding boxes
boxes = non_max_suppression(np.array(rects), probs=min_prob)
            
    
    

In [14]:
# loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:
    startX = int(startX * rW)
    startY = int(startY * rH)
    endX = int(endX * rW)
    endY = int(endY * rH)
    
    cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)

In [15]:
cv2.imshow('Text Detection', orig)
cv2.waitKey(5000)
cv2.destroyAllWindows()