In [1]:
import numpy as np 
import matplotlib.pyplot as plt 
import pytesseract
from PIL import Image
import cv2
import pandas as pd 
import skimage
from pathlib import Path

## Dataset paths and loading

In [2]:
from common import *

## Load image

In [5]:
%matplotlib qt

In [60]:
im_path = r"C:\Users\admpdi\Code\personal\CMP197\pid_detect\merged\peids_process-28.png"

im = cv2.imread(im_path)
plt.imshow(im)

<matplotlib.image.AxesImage at 0x271eadf2e50>

## Text Detection

In [52]:
from craft_text_detector import (
    load_craftnet_model,
    load_refinenet_model,
    get_prediction,
)

# load models
refine_net = load_refinenet_model(cuda=True)
craft_net = load_craftnet_model(cuda=True)

In [61]:
window = np.array(im.shape[:2])
n_windows = np.max( np.stack((np.ones((2,)), np.array(im.shape[:2])/1024)) , axis=0)

window = window/n_windows
wh, ww = window
sh, sw = (window/2).astype(int)

print("Windows:",(wh, ww))
print("Stride:",(sh, sw))

Windows: (1024.0, 1024.0)
Stride: (512, 512)


In [62]:
from skimage.util import view_as_windows

def windows_to_image(windows_view):
    t_blocks = windows_view[::2,::2]
    return np.hstack(np.hstack((t_blocks)))

gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
t, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
t = view_as_windows(thresh, (wh,ww), (sh,sw))

offsets = np.zeros((*t.shape[:2],2))
for i in range(t.shape[0]):
    for j in range(t.shape[1]):
        offsets[i,j] = (sw*j, sh*i)

# plt.imshow(thresh)
# plt.plot(offsets.reshape(-1,2)[:,0],offsets.reshape(-1,2)[:,1],"rx")

plt.imshow(t[-1,-1])
t.shape

(5, 8, 1024, 1024)

In [63]:
outputs = []
for window in t.reshape(-1,int(wh),int(ww)):
    prediction_result = get_prediction(
        image=window,
        craft_net=craft_net,
        refine_net=refine_net,
        cuda=True,
        poly=False
    )
    outputs.append(prediction_result)

In [64]:
fig, axs = plt.subplots(1,2, sharex=True, sharey=True)
# plt.imshow(thresh)
draw1 = cv2.cvtColor(thresh,cv2.COLOR_GRAY2RGB)
draw2 = cv2.cvtColor(thresh,cv2.COLOR_GRAY2RGB)


offs_boxes = []
for offs, output in zip(offsets.reshape(-1,2), outputs):
    if len(output["boxes"])>0:
        boxes = output["boxes"] + offs

        offs_boxes.append(boxes)
        draw1 = cv2.drawContours(draw1,boxes[...,np.newaxis,:].astype(int),-1, (0,0,0), thickness=-1)
        draw2 = cv2.drawContours(draw2,boxes[...,np.newaxis,:].astype(int),-1, (255,0,0), thickness=2)

axs[0].imshow(draw1)
axs[1].imshow(draw2)

<matplotlib.image.AxesImage at 0x271eb024a30>

In [66]:
def windows_to_image(windows_view):
    t_blocks = windows_view[::2,::2]
    return np.hstack(np.hstack((t_blocks)))


fig, axs = plt.subplots(1,3, sharex=True, sharey=True)
text_heatmaps = np.stack([o["heatmaps"]["text_score_heatmap"] for o in outputs])
link_heatmaps = np.stack([o["heatmaps"]["link_score_heatmap"] for o in outputs])


text_heatmaps =windows_to_image(text_heatmaps.reshape((*t.shape[:2], *text_heatmaps.shape[1:])))
link_heatmaps =windows_to_image(link_heatmaps.reshape((*t.shape[:2], *link_heatmaps.shape[1:])))

axs[0].imshow(cv2.resize(text_heatmaps, im.shape[1::-1]))
axs[1].imshow(cv2.resize(link_heatmaps, im.shape[1::-1]))
axs[2].imshow(im)

<matplotlib.image.AxesImage at 0x271fc7cea90>

In [42]:
plt.imshow(outputs[0]["heatmaps"]["link_score_heatmap"])

<matplotlib.image.AxesImage at 0x271a2ead5b0>

In [31]:
r = np.vstack(offs_boxes)[4, 0::2].astype(int)
plt.imshow(thresh[rect_to_slice(r)])

<matplotlib.image.AxesImage at 0x271934a8460>

In [32]:
boxes = np.vstack(offs_boxes)
print(len(boxes))
boxes_nms = non_max_suppression_fast(boxes[:,0::2].reshape(-1,4), overlapThresh=0.4)
print(boxes_nms.shape)

34
(34, 4)


In [36]:
# for psm in (0, 1, 3, 4, 5, 6, 7, 11, 12, 13):
psm = 7
draw2 = cv2.cvtColor(thresh,cv2.COLOR_GRAY2RGB)
draw3 = cv2.cvtColor(thresh,cv2.COLOR_GRAY2RGB)
text_cleanup = thresh.copy()

boxes_filtered = []

for i, r in enumerate(boxes_nms.reshape(-1,2,2)):
    crop = im[rect_to_slice(r, margin=5)]

    h, w = crop.shape[:2]

    tall = h > 1.3*w
    if tall:
        crop = cv2.rotate(crop, cv2.ROTATE_90_CLOCKWISE)


    try:
        text = pytesseract.image_to_string(crop, config=f"--oem 3 --psm {psm}")
    except pytesseract.TesseractError:
        print("Oopsie from tesseract")
        break
    
    if len(text)>0:
        alpha_percent = alpha_count(text) / len(text)

        if alpha_percent < 0.4 :
            ...
            draw_rects(draw2, r, (255,0,15), thickness=1)
        else:
            draw_rects(draw2, r, (255,0,0), thickness=1)
            draw_rects(text_cleanup, r, 0, thickness=-1)
            boxes_filtered.append(r.flatten())

        cv2.putText(draw2, text.strip(), r[0], cv2.FONT_HERSHEY_PLAIN, 1, (0,255,255))


plt.imshow(draw2)
plt.title("Detected")
# axs[1].imshow(text_cleanup)

Text(0.5, 1.0, 'Detected')

In [49]:
fig.tight_layout()

In [47]:
gts = np.stack(data["words"]["box"])

gts = gts[(np.abs(w[:,0]-w[:,2])>0) & (np.abs(w[:,1]-w[:,3])>0)]

print("precision, recall")
print(np.round(detection_metrics(boxes_nms,gts, iou_thresh=0.5), 2))
print(np.round(detection_metrics(np.stack(boxes_filtered),gts, iou_thresh=0.5),2))

precision, recall
[0.83 0.93]
[0.92 0.93]


In [48]:
w = np.stack(data["words"].box)
len(w)

149