In [23]:
import numpy as np 
import matplotlib.pyplot as plt 
from PIL import Image
import cv2
import pandas as pd 
import skimage
from pathlib import Path
from common import *

In [2]:
def pltcnt(c, ax=None, **kwargs):
    if ax:
        ax.plot([p[0][0] for p in c], [ p[0][1] for p in c ], **kwargs)
    else:
        plt.plot([p[0][0] for p in c], [ p[0][1] for p in c ],**kwargs)

# Dataset: Digitize-P&ID

Path

In [24]:
image_id = 2
im, data = load_sample(image_id)

In [3]:
dbpath = Path("../DigitizePID_Dataset")

Masks, images, info

In [4]:
imagepath = dbpath /  "image_2"
imageformat = "jpg"
maskpath  = dbpath /  "mask"
maskformat = "png"

def im2mask(image):
    return maskpath / f"{image.stem}_mask.{maskformat}"
def mask2im(mask):
    return imagepath / f"{mask.stem}.{imageformat}"
def im2info(image):
    dfs = {  }
    for file in  (dbpath / image.stem).glob("*.npy"):
        data = np.load(str(file), allow_pickle=True)
        name = file.stem.split("_")[-1]
        dfs[name] = pd.DataFrame(data)
    return dfs

In [5]:
%matplotlib tk

def draw_rects(img, tl_br_points, color=(255,0,0), **kwargs):
    for p1, p2 in tl_br_points:
        cv2.rectangle(img, p1, p2, color=color, **kwargs)

# for image in imagepath.glob(f"*.{imageformat}"):

image = imagepath / f"2.{imageformat}"

im = cv2.imread(str(image))
mask = cv2.imread(str(im2mask(image)), cv2.IMREAD_GRAYSCALE)

# 375, 250
# 5630, 4300
roi = (slice(250, 4300), slice(375, 5630))
im = im[roi]
mask = mask[roi]

tr = np.array([375,250])

# im 
contours, hierarchy = cv2.findContours(255*(mask>0).astype(np.uint8), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)


data = im2info(image)
draw = im.copy()

# Linetypes
solid_lines = np.stack(data["lines"][data["lines"][3] == 'solid'].iloc[:,1]).reshape(-1,2,2) - tr
dashed_lines = np.stack(data["lines"][data["lines"][3] == 'dashed'].iloc[:,1]).reshape(-1,2,2) - tr
def draw_pipelines(image):
    draw = image.copy()
    draw = cv2.drawContours(draw, solid_lines, -1, (255, 255, 0), thickness=2)
    draw = cv2.drawContours(draw, dashed_lines, -1, (0, 255, 255), thickness=2)
    return draw
# cnts2 = np.stack(data["lines2"].iloc[:,:3]).reshape(-1,2,2)


# 
cnts2 = data["lines2"].iloc[:,:4].to_numpy().reshape(-1,2,2) - tr
draw = cv2.drawContours(draw, cnts2, -1, (0, 255, 255), thickness=2)


def draw_symbols(image):
    draw = image.copy()
    for i, group in data["symbols"].groupby(2):
        color = (np.random.rand(3)*255).astype(np.uint8)
        symbols = np.stack(group.iloc[:,1]).reshape(-1,2,2) - tr
        draw_rects(draw, symbols, color=[int(c) for c in color], thickness=2)
    return draw

# Input
text_boxes = np.stack(data["words"].iloc[:,1]).reshape(-1,2,2) - tr

# Sort X and Y coords inside each rect
text_boxes = np.sort(text_boxes.reshape(-1,2,2),axis=1)
def draw_text_boxes(image, color=(255,0,255), thickness=1):
    draw = image.copy()
    draw_rects(draw, text_boxes, color=color, thickness=thickness)
    return draw
# draw = cv2.drawContours(draw, text, -1, (255, 0, 0), thickness=2)


# draw = cv2.drawContours(draw, contours, -1, (255, 0, 0), thickness=2)
# draw = cv2.drawContours(draw, symbols, -1, (0, 0, 255), thickness=2)


# im = cv2.imread("test.jpg")
draw = draw_pipelines(draw)
draw = draw_symbols(draw)
draw = draw_text_boxes(draw)
plt.imshow(draw)


<matplotlib.image.AxesImage at 0x23313b8f0a0>

## Thresh

In [6]:
gray = np.mean(im,axis=-1).astype(np.uint8)
t, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
plt.imshow(thresh)

<matplotlib.image.AxesImage at 0x23313d840a0>

## Edges

In [7]:
dx = cv2.Sobel(gray,cv2.CV_32FC1, 0,1)
dy = cv2.Sobel(gray,cv2.CV_32FC1, 1,0)
grad = np.sqrt(dx**2+dy**2)

fig, axs = plt.subplots(1,2, sharex=True, sharey=True)
t, grad_thresh = cv2.threshold((grad*255/grad.max()).astype(np.uint8), 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
axs[0].imshow(grad_thresh)
axs[1].imshow(grad)

<matplotlib.image.AxesImage at 0x233133180a0>

In [8]:
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, ksize=(7,7))

grad = cv2.morphologyEx(thresh, cv2.MORPH_GRADIENT, kernel)
plt.imshow(grad)

<matplotlib.image.AxesImage at 0x233170415b0>

In [9]:
fig, axs = plt.subplots(1,2, sharex=True, sharey=True)

skel = skimage.morphology.skeletonize(thresh//255, method="lee")
axs[0].imshow(skel)
axs[1].imshow(thresh)

<matplotlib.image.AxesImage at 0x23316eb4490>

In [None]:
contours, hierarchy = cv2.findContours(255-thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

plt.imshow(thresh)
im2 = np.zeros_like(im)

# hierarchy is [next, previous, firstChild, parent]
parents = hierarchy[0,:,-1]
# draw contours with no parents
cv2.drawContours(im2, np.array(contours)[parents==0], -1, color=(255,255,255), thickness=1)
for c in contours:
    pltcnt(c, color="r")

plt.figure()
plt.imshow(im2)

## Hough circles

In [10]:
piece = (slice(None,None), slice(None,None))
hough_input = skel[piece]

radii = np.arange(110,125)/2
# radii = np.arange(10,20)
circles = skimage.transform.hough_circle(hough_input, radii, normalize=False)
_, i, j = np.unravel_index(np.argsort(-circles.ravel()),circles.shape)

In [11]:
accum, cx, cy, rad = skimage.transform.hough_circle_peaks(circles, radii, min_xdistance=np.min(radii).astype(int), min_ydistance=np.min(radii).astype(int))

In [29]:
# fig, axs = plt.subplots(1,2, sharex=True, sharey=True)

plt.imshow(im[piece], cmap="gray")
draw = np.zeros_like(im[piece])

# Max diff.
lim = accum[1+np.argmax(np.abs(np.diff(accum)))]

fit = accum > lim

boxes = []
for x,y, r, f, a in zip(cx, cy, rad, fit, accum):
    # draw =cv2.circle(draw, (int(x),int(y)), int(r), color=(255,0,0))
# plt.plot(cx[:50],cy[:50],"rx")
    a = a/accum.max()
    if f:
        plt.gca().add_patch(plt.Circle((x,y), r, color=(a if f else 0,0,0), fill=False, linewidth=a*3))
        boxes.append([x-r,y-r, x+r,y+r])

# plt.imshow(hough_input)
plt.title("Círculos detectados")
plt.tight_layout()

Exception in Tkinter callback
Traceback (most recent call last):
  File "C:\Users\admpdi\Code\su-visao\WPy64-3880\python-3.8.8.amd64\lib\tkinter\__init__.py", line 1892, in __call__
    return self.func(*args)
  File "C:\Users\admpdi\Code\su-visao\WPy64-3880\python-3.8.8.amd64\lib\tkinter\__init__.py", line 814, in callit
    func(*args)
  File "c:\Users\admpdi\Code\personal\CMP197\pid_detect\.venv\lib\site-packages\matplotlib\backends\_backend_tk.py", line 489, in delayed_destroy
    self.window.destroy()
  File "C:\Users\admpdi\Code\su-visao\WPy64-3880\python-3.8.8.amd64\lib\tkinter\__init__.py", line 2312, in destroy
    self.tk.call('destroy', self._w)
_tkinter.TclError: can't invoke "destroy" command: application has been destroyed


In [27]:
gt_boxes = np.stack(data["symbols"]["box"])
gt_classes = np.stack(data["symbols"]["class"]).astype(int)

instrumentation_boxes = [box for i, box in enumerate(gt_boxes) if gt_classes[i] in { 26, 27, 28, 29, 31 } ]

detection_metrics(boxes, instrumentation_boxes)

(1.0, 1.0)

In [19]:
plt.tight_layout()

In [None]:
circ_x, circ_y = skimage.draw.circle_perimeter(int(x),int(y),int(r), shape=draw.shape)
plt.imshow(im)
m = np.zeros_like(im)
m[circ_x, circ_y]=(255,0,0)
plt.imshow(m)

In [None]:
plt.plot(accum)
plt.plot(np.diff(accum))

In [None]:
accum

# Graph?


In [None]:
contours, hierarchy = cv2.findContours(skel, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

im2 = np.zeros_like(im)
# hierarchy is [next, previous, firstChild, parent]
# draw contours with no parents
# cv2.drawContours(im2, contours, -1, color=(255,255,255), thickness=1)
plt.imshow(im2)
for c in contours:
    pltcnt(c)

# plt.figure()
# plt.imshow(im2)

## Morphology hats

In [None]:
def draw_gt(image):
    image = draw_pipelines(image)
    image = draw_symbols(image)
    image = draw_text_boxes(image)
    return image

In [None]:
fig, axs = plt.subplots(1,2, sharex=True, sharey=True)

gray = np.mean(im,axis=-1).astype(np.uint8)
t, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
skel = skimage.morphology.skeletonize(thresh//255, method="lee")
# skel = thresh

kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(25,25))
closing_kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(3,3))
# kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(5,5))

blackhat = cv2.morphologyEx(skel, cv2.MORPH_BLACKHAT, kern)

blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_OPEN, closing_kern)
blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_CLOSE, closing_kern)
# tophat = cv2.morphologyEx(skel, cv2.MORPH_TOPHAT, kern)

draw = draw_symbols(cv2.cvtColor(blackhat*255, cv2.COLOR_GRAY2RGB))

axs[0].imshow(skel)
axs[1].imshow(draw)

# Text

In [None]:
from craft_text_detector import (
    load_craftnet_model,
    load_refinenet_model,
    get_prediction,
)

# load models
refine_net = load_refinenet_model(cuda=False)
craft_net = load_craftnet_model(cuda=False)

In [None]:
window = np.array(im.shape[:2])/5
wh, ww = window
sh, sw = (window/2).astype(int)

In [None]:
from skimage.util import view_as_windows
# plt.imshow(thresh[:1024,:1024])

t = view_as_windows(thresh, (wh,ww), (sh,sw))

plt.imshow(t[0,0])
t.shape

In [None]:
outputs = []
for window in t.reshape(-1,int(wh),int(ww)):
    prediction_result = get_prediction(
        image=window,
        craft_net=craft_net,
        refine_net=refine_net,
        cuda=False,
        poly=False
    )
    outputs.append(prediction_result)

In [None]:
offsets = np.zeros((*t.shape[:2],2))
for i in range(t.shape[0]):
    for j in range(t.shape[1]):
        offsets[i,j] = (sw*j, sh*i)

plt.imshow(thresh)
plt.plot(offsets.reshape(-1,2)[:,0],offsets.reshape(-1,2)[:,1],"rx")

In [None]:
fig, axs = plt.subplots(1,2, sharex=True, sharey=True)
# plt.imshow(thresh)
draw1 = cv2.cvtColor(thresh,cv2.COLOR_GRAY2RGB)
draw2 = cv2.cvtColor(thresh,cv2.COLOR_GRAY2RGB)


offs_boxes = []
for offs, output in zip(offsets.reshape(-1,2), outputs):
    if len(output["boxes"])>0:
        boxes = output["boxes"] + offs

        offs_boxes.append(boxes)
        draw1 = cv2.drawContours(draw1,boxes[...,np.newaxis,:].astype(np.int),-1, (0,0,0), thickness=-1)
        draw2 = cv2.drawContours(draw2,boxes[...,np.newaxis,:].astype(np.int),-1, (255,0,0), thickness=5)

axs[0].imshow(draw1)
axs[1].imshow(draw2)

In [None]:
def rect_to_slice(rect_pts, margin=0):
    """
    Convert cv-style rect to numpy-style slice
    """
    (x0, y0), (x1, y1) = rect_pts

    return (slice(y0-margin, y1+margin), slice(x0-margin, x1+margin))

r = np.vstack(offs_boxes)[4, 0::2].astype(int)
plt.imshow(thresh[rect_to_slice(r)])

In [None]:
import numpy as np
# Malisiewicz et al.

def non_max_suppression_fast(boxes, overlapThresh):
	# if there are no boxes, return an empty list
	if len(boxes) == 0:
		return []
	# if the bounding boxes integers, convert them to floats --
	# this is important since we'll be doing a bunch of divisions
	if boxes.dtype.kind == "i":
		boxes = boxes.astype("float")
	# initialize the list of picked indexes	
	pick = []
	# grab the coordinates of the bounding boxes
	x1 = boxes[:,0]
	y1 = boxes[:,1]
	x2 = boxes[:,2]
	y2 = boxes[:,3]
	# compute the area of the bounding boxes and sort the bounding
	# boxes by the bottom-right y-coordinate of the bounding box
	area = (x2 - x1 + 1) * (y2 - y1 + 1)
	idxs = np.argsort(y2)
	# keep looping while some indexes still remain in the indexes
	# list
	while len(idxs) > 0:
		# grab the last index in the indexes list and add the
		# index value to the list of picked indexes
		last = len(idxs) - 1
		i = idxs[last]
		pick.append(i)
		# find the largest (x, y) coordinates for the start of
		# the bounding box and the smallest (x, y) coordinates
		# for the end of the bounding box
		xx1 = np.maximum(x1[i], x1[idxs[:last]])
		yy1 = np.maximum(y1[i], y1[idxs[:last]])
		xx2 = np.minimum(x2[i], x2[idxs[:last]])
		yy2 = np.minimum(y2[i], y2[idxs[:last]])
		# compute the width and height of the bounding box
		w = np.maximum(0, xx2 - xx1 + 1)
		h = np.maximum(0, yy2 - yy1 + 1)
		# compute the ratio of overlap
		overlap = (w * h) / area[idxs[:last]]
		# delete all indexes from the index list that have
		idxs = np.delete(idxs, np.concatenate(([last],
			np.where(overlap > overlapThresh)[0])))
	# return only the bounding boxes that were picked using the
	# integer data type
	return boxes[pick].astype("int")

In [None]:
boxes = np.vstack(offs_boxes)
print(len(boxes))
boxes_nms = non_max_suppression_fast(boxes[:,0::2].reshape(-1,4), overlapThresh=0.4)
print(boxes_nms.shape)

### Text iou

In [None]:
fig, axs = plt.subplots(2,2, sharex=True, sharey=True)

text_mask = np.zeros_like(thresh)

gt_text_mask = np.zeros_like(thresh)
gt_text_mask = draw_text_boxes(gt_text_mask, color=255, thickness=-1)

draw_rects(text_mask, boxes_nms.reshape(-1,2,2), 255, thickness=-1)

axs[0,0].imshow(text_mask)
axs[0,0].set_title("Detected")
axs[0,1].imshow(gt_text_mask)
axs[0,1].set_title("GT")

intersection=text_mask & gt_text_mask
axs[1,0].imshow(intersection)
axs[1,0].set_title("Intersection")

union=text_mask | gt_text_mask
axs[1,1].imshow(union)
axs[1,1].set_title("Union")

print("iou=")
print(np.count_nonzero(intersection)/np.count_nonzero(union))

In [None]:
from mean_average_precision import MetricBuilder

metric = MetricBuilder.build_evaluation_metric("map_2d", async_mode=True, num_classes=1)

text = np.stack(data["words"].iloc[:,1]).reshape(-1,2,2) - tr

gt = text.reshape(-1,4)

# preds = np.hstack((boxes_nms, np.zeros((boxes_nms.shape[0],2))))
preds = np.hstack((gt, np.zeros((gt.shape[0],2))))
gt = np.hstack((gt, np.zeros((gt.shape[0],3))))

metric.add(preds,gt)

metric.value(iou_thresholds=0.5)

In [None]:
from mean_average_precision import 

# Pred
# [xmin, ymin, xmax, ymax, class_id, confidence]

preds = np.hstack((boxes_nms, np.zeros((len(boxes_nms),1))))
preds = np.hstack((preds, np.ones((len(boxes_nms),1))))

# Gt
# [xmin, ymin, xmax, ymax, class_id, difficult, crowd]
gt = np.hstack((text_boxes.reshape(-1,4), np.zeros((len(text_boxes),3))))

# create metric_fn
metric_fn = MetricBuilder.build_evaluation_metric("map_2d", async_mode=False, num_classes=1)

metric_fn.add(preds,gt)

out = metric_fn.value(iou_thresholds=0.5)

In [None]:
plt.plot(out[0.5][0]["recall"],out[0.5][0]["precision"])

In [None]:
from mapcalc import calculate_map, calculate_map_range

ground_truth={"boxes":text_boxes.reshape(-1,4),"labels":np.ones(len(text_boxes))}
predictions={"boxes":boxes_nms,"labels":np.ones(len(boxes_nms)), "scores":np.random.rand(len(boxes_nms))}

calculate_map_range(ground_truth, predictions, 0.5,0.95,0.05)
calculate_map(ground_truth, predictions, 0.5)

In [None]:
np.any(boxes_nms[:,1]>boxes_nms[:,3])

In [None]:
sort_boxes = np.sort(text, axis=1)

sort_boxes = sort_boxes.reshape(-1,4)

print(np.hstack((text_boxes,sort_boxes))[:10])

In [None]:
text

### Read text

In [None]:
fig, axs = plt.subplots(1,2, sharex=True, sharey=True)
# plt.imshow(thresh)
draw1 = cv2.cvtColor(thresh,cv2.COLOR_GRAY2RGB)
draw2 = cv2.cvtColor(thresh,cv2.COLOR_GRAY2RGB)


offs_boxes = []
for box in boxes:
    draw_rects(draw1, boxes_nms.reshape(-1,2,2), (0,0,0), thickness=-1)
    draw_rects(draw2, boxes_nms.reshape(-1,2,2), (255,0,0), thickness=5)

axs[0].imshow(draw1)
axs[1].imshow(draw2)

In [None]:
import pytesseract
draw2 = cv2.cvtColor(thresh,cv2.COLOR_GRAY2RGB)

for i, r in enumerate(boxes_nms.reshape(-1,2,2)):
    crop = im[rect_to_slice(r, margin=5)]

    h, w = crop.shape[:2]

    tall = h > 1.5*w
    if tall:
        crop = cv2.rotate(crop, cv2.ROTATE_90_CLOCKWISE)


    text = pytesseract.image_to_string(crop, config="--oem 3 --psm 7")
    
    alpha_percent = alpha_count(text) / len(text)

    if alpha_percent < 0.5 :
        draw_rects(draw2, [r,], (255,0,0), thickness=5)
    else:
        draw_rects(draw2, [r,], (0,255,0), thickness=5)


plt.imshow(draw2)

In [None]:
# plt.imshow(t[0,0])


draw = cv2.cvtColor(t[0,1],cv2.COLOR_GRAY2RGB)
boxes= outputs[1]["boxes"]
draw = cv2.drawContours(draw,boxes[...,np.newaxis,:].astype(np.int),-1, (255,0,0), thickness=5)

plt.imshow(draw)

### Detection

In [None]:
fig, axs = plt.subplots(1,2, sharex=True, sharey=True)

gray = np.mean(draw1,axis=-1).astype(np.uint8)
t, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
skel = skimage.morphology.skeletonize(thresh//255, method="lee")
# skel = thresh

kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(35,35))
closing_kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(5,5))
# kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(5,5))

blackhat = cv2.morphologyEx(skel, cv2.MORPH_BLACKHAT, kern)

blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_OPEN, closing_kern)
blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_CLOSE, closing_kern, iterations=3)
# tophat = cv2.morphologyEx(skel, cv2.MORPH_TOPHAT, kern)

draw = draw_symbols(cv2.cvtColor(blackhat*255, cv2.COLOR_GRAY2RGB))

axs[0].imshow(skel)
axs[1].imshow(draw)

In [None]:
fig, axs = plt.subplots(1,2, sharex=True, sharey=True)

gray = np.mean(draw1,axis=-1).astype(np.uint8)
t, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
skel = skimage.morphology.skeletonize(thresh//255, method="lee")
# skel = thresh

kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(35,35))
closing_kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(5,5))
# kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(5,5))

blackhat = cv2.morphologyEx(skel, cv2.MORPH_BLACKHAT, kern)

blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_OPEN, closing_kern)
blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_CLOSE, closing_kern, iterations=3)

blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_DILATE, closing_kern, iterations=2)
blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_ERODE, closing_kern, iterations=1)
# tophat = cv2.morphologyEx(skel, cv2.MORPH_TOPHAT, kern)

draw = draw_symbols(cv2.cvtColor(blackhat*255, cv2.COLOR_GRAY2RGB))

axs[0].imshow(skel)
axs[1].imshow(draw)

In [None]:
# im 
contours, hierarchy = cv2.findContours(blackhat*255, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

symbol_boxes = []
for c in contours:
    if cv2.contourArea(c) > 10:
        x,y,w,h =cv2.boundingRect(c)
        symbol_boxes.append([x,y,x+w,y+h])

draw = im.copy()
draw_rects(draw, np.stack(symbol_boxes).reshape(-1,2,2), thickness=8)
plt.imshow(draw)

In [None]:
def detect_symbols(image):

    if image.ndim == 3:
        gray = np.mean(image,axis=-1).astype(np.uint8)
    else:
        gray=image

    t, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    # Foreground is smaller than 50% of image
    if np.count_nonzero(thresh) > thresh.size/2:
        thresh = 255-thresh

    skel = skimage.morphology.skeletonize(thresh//255, method="lee")

    kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(35,35))
    closing_kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(5,5))

    blackhat = cv2.morphologyEx(skel, cv2.MORPH_BLACKHAT, kern)

    blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_OPEN, closing_kern)
    blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_CLOSE, closing_kern, iterations=3)

    blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_DILATE, closing_kern, iterations=2)
    blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_ERODE, closing_kern, iterations=1)

    contours, hierarchy = cv2.findContours(blackhat*255, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    symbol_boxes = []
    for c in contours:
        if cv2.contourArea(c) > 10:
            x,y,w,h =cv2.boundingRect(c)
            symbol_boxes.append([x,y,x+w,y+h])

    return np.stack(symbol_boxes)

draw = im.copy()

symbol_boxes = detect_symbols(draw1)
draw_rects(draw, np.stack(symbol_boxes).reshape(-1,2,2), thickness=8)
plt.imshow(draw)

In [None]:
import mahotas 

def get_largest_contour(im):
    contours, hierarchy = cv2.findContours(im, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cmax = sorted(contours, key=cv2.contourArea)[-1]
    return cmax

def zernike_adaptive_centroid(image, degree=8):
    c = get_largest_contour(image)
    (x,y),r = cv2.minEnclosingCircle(c)
    return  mahotas.features.zernike_moments(image, r, degree=degree)

In [None]:
crops = [ thresh[rect_to_slice(s.reshape(2,2), margin=15)] for s in symbol_boxes] 

features = [ zernike_adaptive_centroid(crop) for crop in crops]

print(np.stack(features))

In [None]:
import sklearn.cluster

# centroid, labels, _ = sklearn.cluster.k_means(np.stack(features), n_clusters=5)
b = sklearn.cluster.estimate_bandwidth(np.stack(features))
centroid, labels = sklearn.cluster.mean_shift(np.stack(features), bandwidth=b)

In [None]:
for l in np.unique(labels):
    fig,ax = plt.subplots(1, 1+np.count_nonzero(labels==l))
    i=0
    for label, crop in zip(labels,crops):
        if label==l:
            ax[i].imshow(crop)
            i+=1