In [2]:
import numpy as np 
import matplotlib.pyplot as plt 
from PIL import Image
import cv2
import pandas as pd 
import skimage
from pathlib import Path

## Dataset paths and loading

In [3]:
dbpath = Path("../DigitizePID_Dataset")

imagepath = dbpath /  "image_2"
imageformat = "jpg"
maskpath  = dbpath /  "mask"
maskformat = "png"

def im2mask(image):
    return maskpath / f"{image.stem}_mask.{maskformat}"
def mask2im(mask):
    return imagepath / f"{mask.stem}.{imageformat}"
def im2info(image):
    dfs = {  }
    for file in  (dbpath / image.stem).glob("*.npy"):
        data = np.load(str(file), allow_pickle=True)
        name = file.stem.split("_")[-1]
        dfs[name] = pd.DataFrame(data)
    return dfs

In [13]:
def load_sample(image_id):
    image = imagepath / f"{image_id}.{imageformat}"
    im = cv2.imread(str(image))

    # 375, 250
    # 5630, 4300
    roi = (slice(250, 4300), slice(375, 5630))
    im = im[roi]

    tr = np.array([375,250])

    data = im2info(image)

    # Translate lines
    lines = np.stack(data["lines"][1]).reshape(-1,2,2) - tr
    data["lines"][1] = list(lines.reshape(-1,4))
    data["lines"].columns=["name","box","code","type"]

    # Fix text boxes
    text_boxes = np.stack(data["words"].iloc[:,1]).reshape(-1,2,2) - tr
    # Sort X and Y coords inside each rect
    text_boxes = np.sort(text_boxes.reshape(-1,2,2),axis=1)
    data["words"][1] = list(text_boxes.reshape(-1,4))
    data["words"].columns=["name","box","code","type"]

    # Translate symbols
    symbols = np.stack(data["symbols"].iloc[:,1]).reshape(-1,2,2) - tr
    data["symbols"][1]=list(symbols.reshape(-1,4))
    data["symbols"].columns=["name","box","class"]

    return im, data

## Load sample image

In [14]:
im, data = load_sample(1)

## Drawing funcs

In [27]:
def draw_rects(img, tl_br_points, color=(255,0,0), **kwargs):

    if tl_br_points.ndim!=3:
        tl_br_points = tl_br_points.reshape(-1,2,2)

    for p1, p2 in tl_br_points:
        cv2.rectangle(img, p1, p2, color=color, **kwargs)

# Linetypes
def draw_pipelines(image, data=data):
    draw = image.copy()
    solid_lines = np.stack(data["lines"].query("type=='solid'")["box"])
    dashed_lines = np.stack(data["lines"].query("type=='dashed'")["box"])

    draw = cv2.drawContours(draw, solid_lines.reshape(-1,2,2), -1, (255, 255, 0), thickness=2)
    draw = cv2.drawContours(draw, dashed_lines.reshape(-1,2,2), -1, (0, 255, 255), thickness=2)
    return draw

def draw_symbols(image, data=data, color=None, thickness=2):
    draw = image.copy()
    for i, group in data["symbols"].groupby("class"):
        color_ = color or (np.random.rand(3)*255).astype(np.uint8)
        symbols = np.stack(group["box"])
        draw_rects(draw, symbols, color=[int(c) for c in color_], thickness=thickness)
    return draw

def draw_text_boxes(image, data=data, color=(255,0,255), thickness=1):
    draw = image.copy()
    text_boxes = np.stack(data["words"]["box"])
    draw_rects(draw, text_boxes, color=color, thickness=thickness)
    return draw

In [28]:
%matplotlib tk


# im = cv2.imread("test.jpg")
draw = im.copy()
draw = draw_pipelines(draw)
draw = draw_symbols(draw)
draw = draw_text_boxes(draw)
plt.imshow(draw)

<matplotlib.image.AxesImage at 0x7efd434582b0>

## Detection with blackhat

In [29]:
def detect_symbols(image):

    if image.ndim == 3:
        gray = np.mean(image,axis=-1).astype(np.uint8)
    else:
        gray=image

    t, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    # Foreground is smaller than 50% of image
    if np.count_nonzero(thresh) > thresh.size/2:
        thresh = 255-thresh

    skel = skimage.morphology.skeletonize(thresh//255, method="lee")

    kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(35,35))
    closing_kern = cv2.getStructuringElement(cv2.MORPH_RECT, ksize=(5,5))

    blackhat = cv2.morphologyEx(skel, cv2.MORPH_BLACKHAT, kern)

    blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_OPEN, closing_kern)
    blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_CLOSE, closing_kern, iterations=3)

    blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_DILATE, closing_kern, iterations=2)
    blackhat = cv2.morphologyEx(blackhat, cv2.MORPH_ERODE, closing_kern, iterations=1)

    contours, hierarchy = cv2.findContours(blackhat*255, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

    symbol_boxes = []
    for c in contours:
        if cv2.contourArea(c) > 10:
            x,y,w,h =cv2.boundingRect(c)
            symbol_boxes.append([x,y,x+w,y+h])

    return np.stack(symbol_boxes)


## Detect without text removal

In [30]:
fig, axs = plt.subplots(1,2, sharex=True, sharey=True)

draw = im.copy()

symbol_boxes = detect_symbols(im)

draw_rects(draw, np.stack(symbol_boxes).reshape(-1,2,2), thickness=8)
axs[0].imshow(draw)

draw2 = draw_symbols(draw, thickness=8, color=(0,255,0))
axs[1].imshow(draw2)

<matplotlib.image.AxesImage at 0x7efd433f8e80>

### Features for each object

In [32]:
import mahotas 

def get_largest_contour(im):
    contours, hierarchy = cv2.findContours(im, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cmax = sorted(contours, key=cv2.contourArea)[-1]
    return cmax

def zernike_adaptive_centroid(image, degree=8):
    c = get_largest_contour(image)
    (x,y),r = cv2.minEnclosingCircle(c)
    return  mahotas.features.zernike_moments(image, r, degree=degree)

def rect_to_slice(rect_pts, margin=0):
    """
    Convert cv-style rect to numpy-style slice
    """
    (x0, y0), (x1, y1) = rect_pts

    return (slice(y0-margin, y1+margin), slice(x0-margin, x1+margin))

In [33]:
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
t, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
crops = [ thresh[rect_to_slice(s.reshape(2,2), margin=15)] for s in symbol_boxes] 

features = [ zernike_adaptive_centroid(crop) for crop in crops]

print(np.stack(features))

[[3.18309886e-01 1.08796273e-01 2.92339949e-02 ... 9.55055909e-02
  3.09650680e-02 3.90511023e-02]
 [3.18309886e-01 2.07845653e-04 2.25211249e-01 ... 8.30285786e-03
  6.96331941e-02 2.54502277e-02]
 [3.18309886e-01 3.02747019e-03 1.50428320e-01 ... 2.26116712e-02
  7.37736842e-03 5.94830356e-02]
 ...
 [3.18309886e-01 4.80393568e-05 2.76528224e-01 ... 7.11736848e-02
  5.83531401e-03 7.60186801e-02]
 [3.18309886e-01 1.02862569e-02 1.81708813e-01 ... 2.53316930e-02
  2.55687760e-02 4.00401239e-02]
 [3.18309886e-01 6.30387292e-04 2.06634989e-01 ... 4.01115882e-02
  1.08528253e-02 5.94822683e-02]]


#### Clustering

In [34]:
import sklearn.cluster

centroid, labels, _ = sklearn.cluster.k_means(np.stack(features), n_clusters=5)
# b = sklearn.cluster.estimate_bandwidth(np.stack(features))
# centroid, labels = sklearn.cluster.mean_shift(np.stack(features), bandwidth=b)

In [35]:
for l in np.unique(labels):
    fig,ax = plt.subplots(1, 1+np.count_nonzero(labels==l))
    i=0
    for label, crop in zip(labels,crops):
        if label==l:
            ax[i].imshow(crop)
            i+=1