In [None]:
import sys
sys.path.append('..')

In [None]:
import io
import math
from pathlib import Path

import numpy as np
import cv2 as cv
import pytesseract
from PIL import Image as PImage, ImageDraw, ImageOps

In [None]:
DATA_DIR = Path('..') / 'data'
MODEL_DIR = DATA_DIR / 'model'
LABEL_DIR = DATA_DIR / 'labels' / 'typewritten'

MODEL = MODEL_DIR / 'frozen_east_text_detection.pb'

In [None]:
def non_max_suppression(boxes, probs=None, overlapThresh=0.3):
	# if there are no boxes, return an empty list
	if len(boxes) == 0:
		return []

	# if the bounding boxes are integers, convert them to floats -- this
	# is important since we'll be doing a bunch of divisions
	if boxes.dtype.kind == "i":
		boxes = boxes.astype("float")

	# initialize the list of picked indexes
	pick = []

	# grab the coordinates of the bounding boxes
	x1 = boxes[:, 0]
	y1 = boxes[:, 1]
	x2 = boxes[:, 2]
	y2 = boxes[:, 3]

	# compute the area of the bounding boxes and grab the indexes to sort
	# (in the case that no probabilities are provided, simply sort on the
	# bottom-left y-coordinate)
	area = (x2 - x1 + 1) * (y2 - y1 + 1)
	idxs = y2

	# if probabilities are provided, sort on them instead
	if probs is not None:
		idxs = probs

	# sort the indexes
	idxs = np.argsort(idxs)

	# keep looping while some indexes still remain in the indexes list
	while len(idxs) > 0:
		# grab the last index in the indexes list and add the index value
		# to the list of picked indexes
		last = len(idxs) - 1
		i = idxs[last]
		pick.append(i)

		# find the largest (x, y) coordinates for the start of the bounding
		# box and the smallest (x, y) coordinates for the end of the bounding
		# box
		xx1 = np.maximum(x1[i], x1[idxs[:last]])
		yy1 = np.maximum(y1[i], y1[idxs[:last]])
		xx2 = np.minimum(x2[i], x2[idxs[:last]])
		yy2 = np.minimum(y2[i], y2[idxs[:last]])

		# compute the width and height of the bounding box
		w = np.maximum(0, xx2 - xx1 + 1)
		h = np.maximum(0, yy2 - yy1 + 1)

		# compute the ratio of overlap
		overlap = (w * h) / area[idxs[:last]]

		# delete all indexes from the index list that have overlap greater
		# than the provided overlap threshold
		idxs = np.delete(idxs, np.concatenate(([last],
			np.where(overlap > overlapThresh)[0])))

	# return only the bounding boxes that were picked
	return boxes[pick].astype("int")

In [None]:
IMAGES = sorted(LABEL_DIR.glob('*.jpg'))

In [None]:
IDX = 46
print(IMAGES[IDX])

In [None]:
MIN_CONF = 0.5

In [None]:
image = cv.imread(str(IMAGES[IDX]))
orig = image.copy()

temp = cv.cvtColor(image, cv.COLOR_BGR2RGB)
# display(PImage.fromarray(temp))

In [None]:
H, W = image.shape[:2]
print(H, W)

In [None]:
new_W = math.ceil(W / 32) * 32
new_H = math.ceil(H / 32) * 32
print(new_H, new_W)

In [None]:
pad_left = (new_W - W) // 2
pad_right = new_W - W - pad_left

pad_top = (new_H - H) // 2
pad_bottom = new_H - H - pad_top

print(pad_top, pad_bottom, pad_left, pad_right)

In [None]:
image = cv.copyMakeBorder(
    image,
    pad_top, pad_bottom, pad_left, pad_right,
    cv.BORDER_CONSTANT,
    value=(0, 0, 0))

(H, W) = image.shape[:2]

In [None]:
# image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
temp = cv.cvtColor(image, cv.COLOR_BGR2RGB)
# display(PImage.fromarray(temp))

In [None]:
net = cv.dnn.readNet(str(MODEL))

In [None]:
layerNames = [
    "feature_fusion/Conv_7/Sigmoid",
    "feature_fusion/concat_3"]

In [None]:
blob = cv.dnn.blobFromImage(
    image, 1.0, (W, H),
    (123.68, 116.78, 103.94),
    swapRB=True, crop=False)

In [None]:
net.setInput(blob)
scores, geometry = net.forward(layerNames)

In [None]:
scores

In [None]:
rows, cols = scores.shape[2:4]

rects = []
confidences = []

# rows, cols

In [None]:
for y in range(rows):
    scoresData = scores[0, 0, y]
    xData0 = geometry[0, 0, y]
    xData1 = geometry[0, 1, y]
    xData2 = geometry[0, 2, y]
    xData3 = geometry[0, 3, y]
    anglesData = geometry[0, 4, y]

    # loop over the number of columns
    for x in range(cols):
        # if our score does not have sufficient probability, ignore it
        if scoresData[x] < MIN_CONF:
            continue
        # compute the offset factor as our resulting feature maps will
        # be 4x smaller than the input image
        (offsetX, offsetY) = (x * 4.0, y * 4.0)
        # extract the rotation angle for the prediction and then
        # compute the sin and cosine
        angle = anglesData[x]
        cos = np.cos(angle)
        sin = np.sin(angle)
        # use the geometry volume to derive the width and height of
        # the bounding box
        h = xData0[x] + xData2[x]
        w = xData1[x] + xData3[x]
        # compute both the starting and ending (x, y)-coordinates for
        # the text prediction bounding box
        endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))
        endY = int(offsetY - (sin * xData1[x]) + (cos * xData2[x]))
        startX = int(endX - w)
        startY = int(endY - h)
        # add the bounding box coordinates and probability score to
        # our respective lists
        rects.append((startX, startY, endX, endY))
        confidences.append(scoresData[x])

In [None]:
# apply non-maxima suppression to suppress weak, overlapping bounding
# boxes
boxes = non_max_suppression(np.array(rects), probs=confidences)

# loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:
    # scale the bounding box coordinates based on the respective
    # ratios
#     startX = int(startX * rW)
#     startY = int(startY * rH)
#     endX = int(endX * rW)
#     endY = int(endY * rH)
    
    cv.rectangle(image, (startX, startY), (endX, endY), (0, 0, 255), 2)

In [None]:
temp = cv.cvtColor(image, cv.COLOR_BGR2RGB)
display(PImage.fromarray(temp))