In [1]:
from imutils.object_detection import non_max_suppression
import numpy as np
import time
import cv2

In [None]:
'''
Important: The EAST text requires that your input image dimensions be multiples of 32,
so if you choose to adjust your --width and --height values, make sure they are multiples of 32!
'''

# load the input image and grab the image dimensions
image = cv2.imread(r"E:\AI Practice Project\cv\basic project\Dont-Forget-To-Pray-Tonight-8bit.png")
orig = image.copy()
(H, W) = image.shape[:2]

# set the new width and height and then 
(newW, newH) = (320, 320)

# determine the ratio in change for both the width and height
rW = W / float(newW)
rH = H / float(newH)

# resize the image and grab the new image dimensions
image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]


# In order to perform text detection using OpenCV and the EAST deep learning model, we need to extract the output feature maps of two layers:

''' The first layer = 
is our output sigmoid activation which gives us the probability of a region containing text or not.

The second layer = 
is the output feature map that represents the “geometry” of the image, 
we will be able to use this geometry to derive the bounding box coordinates of the text in the input image '''
layerNames = [
	"feature_fusion/Conv_7/Sigmoid",
	"feature_fusion/concat_3"]


# load the pre-trained EAST text detector
net = cv2.dnn.readNet(r"E:\AI Practice Project\cv\basic project\frozen_east_text_detection.pb")

# construct a blob from the image and then perform a forward pass of the model to obtain the two output layer sets
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False)
start = time.time()
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
end = time.time()

# show timing information on text prediction
print(f"[INFO] text detection took {end - start} seconds")


# grab the number of rows and columns from the scores volume, then initialize our set of bounding box rectangles and corresponding confidence scores
(numRows, numCols) = scores.shape[2:4]

# loop over the number of rows
rects = []
confidences = []

for y in range(0, numRows):
	# extract the scores (probabilities), followed by the geometrical data used to derive potential bounding box coordinates that surround text
	scoresData = scores[0, 0, y]
	xData0 = geometry[0, 0, y]
	xData1 = geometry[0, 1, y]
	xData2 = geometry[0, 2, y]
	xData3 = geometry[0, 3, y]
	anglesData = geometry[0, 4, y]


	# loop over the number of columns
	for x in range(0, numCols):

		# if our score does not have sufficient probability, ignore it
		if scoresData[x] > 0.5:		 # 0.5 ==> min_confidence
		

			# compute the offset factor as our resulting feature maps will be 4x smaller than the input image
			(offsetX, offsetY) = (x * 4.0, y * 4.0)

			# extract the rotation angle for the prediction and then compute the sin and cosine
			angle = anglesData[x]
			cos = np.cos(angle)
			sin = np.sin(angle)

			# use the geometry volume to derive the width and height of the bounding box
			h = xData0[x] + xData2[x]
			w = xData1[x] + xData3[x]

			# compute both the starting and ending (x, y)-coordinates for the text prediction bounding box
			endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
			endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
			startX = int(endX - w)
			startY = int(endY - h)

			# add the bounding box coordinates and probability score to our respective lists
			rects.append((startX, startY, endX, endY))
			confidences.append(scoresData[x])



# apply non-maxima suppression to suppress weak, overlapping bounding boxes
boxes = non_max_suppression(np.array(rects), probs=confidences)

# loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:

	# scale the bounding box coordinates based on the respective ratios
	startX = int(startX * rW)
	startY = int(startY * rH)
	endX = int(endX * rW)
	endY = int(endY * rH)

	# draw the bounding box on the image
	cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)

# show the output image
cv2.imshow("Text Detection", orig)
cv2.waitKey(0)

**video**

In [2]:
cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()

    image = frame
    orig = image.copy()
    (H, W) = image.shape[:2]

    # set the new width and height and then 
    (newW, newH) = (320, 320)

    # determine the ratio in change for both the width and height
    rW = W / float(newW)
    rH = H / float(newH)

    # resize the image and grab the new image dimensions
    image = cv2.resize(image, (newW, newH))
    (H, W) = image.shape[:2]


    # In order to perform text detection using OpenCV and the EAST deep learning model, we need to extract the output feature maps of two layers:

    ''' The first layer = 
    is our output sigmoid activation which gives us the probability of a region containing text or not.

    The second layer = 
    is the output feature map that represents the “geometry” of the image, 
    we will be able to use this geometry to derive the bounding box coordinates of the text in the input image '''
    layerNames = [
        "feature_fusion/Conv_7/Sigmoid",
        "feature_fusion/concat_3"]


    # load the pre-trained EAST text detector
    net = cv2.dnn.readNet(r"E:\AI Practice Project\cv\basic project\frozen_east_text_detection.pb")

    # construct a blob from the image and then perform a forward pass of the model to obtain the two output layer sets
    blob = cv2.dnn.blobFromImage(image, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False)
   
    net.setInput(blob)
    (scores, geometry) = net.forward(layerNames)

    # grab the number of rows and columns from the scores volume, then initialize our set of bounding box rectangles and corresponding confidence scores
    (numRows, numCols) = scores.shape[2:4]

    # loop over the number of rows
    rects = []
    confidences = []

    for y in range(0, numRows):
        # extract the scores (probabilities), followed by the geometrical data used to derive potential bounding box coordinates that surround text
        scoresData = scores[0, 0, y]
        xData0 = geometry[0, 0, y]
        xData1 = geometry[0, 1, y]
        xData2 = geometry[0, 2, y]
        xData3 = geometry[0, 3, y]
        anglesData = geometry[0, 4, y]


        # loop over the number of columns
        for x in range(0, numCols):

            # if our score does not have sufficient probability, ignore it
            if scoresData[x] < 0.5:		 # 0.5 ==> min_confidence
                continue

            # compute the offset factor as our resulting feature maps will be 4x smaller than the input image
            (offsetX, offsetY) = (x * 4.0, y * 4.0)

            # extract the rotation angle for the prediction and then compute the sin and cosine
            angle = anglesData[x]
            cos = np.cos(angle)
            sin = np.sin(angle)

            # use the geometry volume to derive the width and height of the bounding box
            h = xData0[x] + xData2[x]
            w = xData1[x] + xData3[x]

            # compute both the starting and ending (x, y)-coordinates for the text prediction bounding box
            endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
            endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
            startX = int(endX - w)
            startY = int(endY - h)

            # add the bounding box coordinates and probability score to our respective lists
            rects.append((startX, startY, endX, endY))
            confidences.append(scoresData[x])



    # apply non-maxima suppression to suppress weak, overlapping bounding boxes
    boxes = non_max_suppression(np.array(rects), probs=confidences)

    # loop over the bounding boxes
    for (startX, startY, endX, endY) in boxes:

        # scale the bounding box coordinates based on the respective ratios
        startX = int(startX * rW)
        startY = int(startY * rH)
        endX = int(endX * rW)
        endY = int(endY * rH)

        # draw the bounding box on the image
        cv2.rectangle(orig, (startX, startY), (endX, endY), (0, 255, 0), 2)

    # show the output image
    cv2.imshow("Text Detection", orig)

    key = cv2.waitKey(1) & 0xff
    if key == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()