In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import skimage as ski
import skimage.io
import cv2 as cv
import math
import os
import random
import matplotlib
#import umap
#import hdbscan

from skimage.io import imshow
from math import pi, ceil, floor
from numpy import sqrt
from pprint import pprint
from random import randint
from itertools import combinations 
from tqdm import tqdm_notebook as tqdm
from sklearn.metrics.pairwise import cosine_similarity
from glob import glob

import scipy.ndimage as ndi
import skimage.draw as draw
from skimage.util import invert
from skimage.feature import canny, peak_local_max
from skimage.filters import rank, sobel, threshold_otsu, threshold_local, threshold_minimum, gaussian
from skimage.morphology import disk, erosion, dilation, watershed, remove_small_holes, remove_small_objects, convex_hull_object, area_opening
from skimage.transform import hough_circle, hough_circle_peaks
from skimage.exposure import adjust_gamma, rescale_intensity, equalize_hist
from skimage.io import imshow as skimshow

from cvtools import *
from train import *

In [None]:
nul = 1 / 255
xs = [
    [(0  , 0.0, nul), (360, 0.0, 0.15)], # black
    [(0  , 0.0, 0.7), (360, 0.15, 1.0)], # white
    [(31 , 0.1, 0.1), (49 , 1.0, 1.0)], # orng
    [(50 , 0.1, 0.1), (70 , 1.0, 1.0)], # ylw
    [(71 , 0.1, 0.1), (140, 1.0, 1.0)], # grn
    [(141, 0.1, 0.1), (160, 1.0, 1.0)], # grn
    [(161, 0.1, 0.1), (215, 1.0, 1.0)], # blu
    [(215, 0.1, 0.1), (290, 1.0, 1.0)], # mag
    [(331, 0.2, 0.2), (360, 1.0, 1.0), (0, 0.2, 0.2), (30, 1.0, 1.0)], # red
    [(310, 0.5, 0.1), (360, 1.0, 5.0)], # dark red
]

step = 10
window = 10
for h in range(0, 360, step):
    xs.append([(h, 0, 0.001), (h+window, 1.0, 1.0)])
    
xs = [[hsv2cv(y) for y in ys] for ys in xs]
color_ranges = xs

def color_ratios(bgr, area):
    k = 3
    bgr = cv.bilateralFilter(bgr, k, k*2, k/2)
    #bgr = cv.medianBlur(bgr, 3)
    hsv = cv.cvtColor(bgr, cv.COLOR_BGR2HSV)
    xs = color_ranges
    ys = [0 for _ in range(len(xs))]
    for i, x in enumerate(xs):
        for j in range(len(x) // 2):
            ys[i] += count_in_range(hsv, x[j], x[j+1]) / area
    return ys
    
def dobble_features(cutout):
    cutout = resizeAndPad(cutout, (40, 40))
    cr = color_ratios(cutout, 40**2)
    M = cv.moments(bgr2gray(cutout))
    hu = cv.HuMoments(M).reshape(-1)
    hu = np.abs(hu)
    return np.hstack((hu, cr, cutout.reshape(-1)))

In [None]:
def clean_circle_mask(x, fill=True, erode=8, dilate=5):
    if fill: x = ndi.binary_fill_holes(x)
    for _ in range(erode): x = erosion(x)
    for _ in range(dilate): x = dilation(x)
    return x

def clean_dobble_mask(x, fill=True, remove=True, dilate=1, erode=1):
    for _ in range(dilate): x = dilation(x)
    for _ in range(erode): x = erosion(x)
    if fill: x = ndi.binary_fill_holes(x)
    if remove: x = remove_small_objects(x)
    x, _ = ndi.label(x)
    #x = convex_hull_object(x)
    return ski2cv(x)

In [None]:
def filter_circles(mask, contours):
    out = np.zeros_like(mask, dtype=np.uint8)
    obj = []
    for i, c in enumerate(contours):
        # not an ellipse
        if len(c) < 5: continue
        p = contour_props(c)
        if p is None: continue
        # not circly enough or too small
        if p['roundness'] < 0.6 or p['radius'] < 30: continue
        ellipse = cv.fitEllipse(c)
        ax1, ax2 = ellipse[1]
        # too squishy
        if abs(ax1 / ax2) < 0.7: continue
        obj.append(dict(contour=c, props=p, ellipse=ellipse))
        cv.ellipse(out, ellipse, WHITE, -1)
        #cv.drawContours(out, [c], 0, WHITE, -1)
    return out, obj

def filter_dobbles(img, contours, circles):
    out = np.zeros_like(img, dtype=np.uint8)
    obj = []
    for i, x in enumerate(contours):
        p = contour_props(x)
        if p is None: continue
        xo = p['origin']
        # no circles on image
        if not circles: continue
        # closest circle
        ci = sorted(range(len(circles)), key=lambda i: dist(xo, circles[i]['props']['origin']))[0]
        co = circles[ci]['props']['origin']
        cr = circles[ci]['props']['radius']
        cutout = trimmed_cutout(contour_mask(img, x), x)
        # TODO: check if inside ellipse, not circle
        white = count_in_range(bgr2hsv(cutout), (0, 0, 0.5*255), (360/2, 0.2*255, 255)) / p['area']
        if white > 0.98: continue
        if dist(xo, co) > cr: continue
        obj.append(dict(contour=x, cutout=cutout, props=p, circle=ci))
        cv.drawContours(out, [x], 0, WHITE, -1)
    return out, obj

In [None]:
def extract_metadata(X):
    img = src = shrink(cvimread(X), side=400) if isinstance(X, str) else X
    #img = cv.medianBlur(img, 5)
    #img = adjust_gamma(img, 2)
    cs1 = img
    
    k = 10
    cs1 = cv.bilateralFilter(img, k, k*2, k/2)
    lab = bgr2lab(cs1)
    #lab[:,:,0] = cv.equalizeHist(lab[:,:,0])
    lab[:,:,0] = equalize_adapthist(lab[:,:,0], clip_limit=1, tile_size=10)
    cs1 = lab2bgr(lab)
    
    def basic_circle_mask(input):
        hsv = bgr2hsv(input)
        msk1 = cv.inRange(hsv, (0,     0, 0.8*255), (360/2, 0.03*255, 255)) # very bright whites
        msk2 = cv.inRange(hsv, (0,     0, 0.7*255), (360/2, 0.05*255, 255)) # dark grays
        msk3 = cv.inRange(hsv, (0,     0, 0.4*255), (360/2, 0.07*255, 0.7*255)) # light grays
        msk4 = cv.inRange(hsv, (160/2, 0, 0.3*255), (260/2, 0.3 *255, 255)) # blue whites
        msk = msk1 + msk2 + msk3 + msk4
        msk = ski2cv(msk > 0)
        hsv = cv.copyTo(hsv, msk)
        gray = hsv[:,:,2]
        return gray
    
    cs2 = circle_mask = cv.adaptiveThreshold(bgr2gray(cs1), 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY, 61, 0)
    cs3 = basic_circle_mask(img)
    cs3 = circle_mask = circle_mask * cs3
    cs4 = circle_mask = clean_circle_mask(circle_mask, fill=True, erode=15, dilate=8)
    circle_cont, _ = cv.findContours(ski2cv(circle_mask), cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    circle_mask, circles = filter_circles(circle_mask, circle_cont)

    k = 2
    img = cv.bilateralFilter(img, k, k*2, k*2)
    ds1 = img = equalize_adapthist_lab(img, clip_limit=3, tile_size=4)
    ds2 = gray = bgr2gray(img)
    
    ds3 = dobble_mask = cv.adaptiveThreshold(gray, 255, cv.ADAPTIVE_THRESH_GAUSSIAN_C, cv.THRESH_BINARY_INV, 51, 6)
    ds4 = dobble_mask = dobble_mask * circle_mask
    ds5 = dobble_mask = clean_dobble_mask(dobble_mask, dilate=1, erode=1)
    dobble_cont, _ = cv.findContours(dobble_mask, cv.RETR_TREE, cv.CHAIN_APPROX_SIMPLE)
    dobble_mask, dobbles = filter_dobbles(img, dobble_cont, circles)
    
    steps = [src, cs1, cs2, cs3, cs4, circle_mask, ds1, ds2, ds3, ds4, ds5, dobble_mask]
    return circles, dobbles, steps

circles, dobbles, steps = extract_metadata('res/all/c23.jpg')
print(f'found {len(circles)} circles, {len(dobbles)} dobbles')
imgs = [cv2ski(steps[0]), cv2ski(steps[1])] + steps[2:]
imshow_grid(imgs, 5, 3, figsize=16, pad=.05, cmap='gray')

In [None]:
def build_hierarchy(circles, dobbles):
    h = {j: [] for j in range(len(circles))}
    for i, x in enumerate(dobbles):
        j = x['circle']
        h[j].append(i)
    return h

clas = load_model('res/model.pkl')
inp = np.array([dobble_features(x['cutout']) for x in dobbles])
cat = clas.predict(inp)

out = steps[0].copy()
for i, x in enumerate(circles):
    cv.ellipse(out, x['ellipse'], RCOLORS[i], 1)
    pass

for i, x in enumerate(dobbles):
    ox, oy = x['props']['origin']
    ci = x['circle']
    circ = circles[ci]
    cx, cy = circ['props']['origin']
    color = RCOLORS[ci]
    
    label = cat[i].split('-')[1]
    dobbles[i]['label'] = label
    
    #cv.drawContours(out, [x['contour']], 0, color, 1)
    cv.rectangle(out, (ox-1, oy-1), (ox+1, oy+1), color, 2)
    putText2(out, label, (ox, oy), 0.3)
    
hier = build_hierarchy(circles, dobbles)
for c1, c2 in combinations(hier, 2):
    for i in hier[c1]:
        for j in hier[c2]:
            u, v = dobbles[i], dobbles[j]
            if u['label'] == v['label'] and u['label'] is not None:
                ou, ov = u['props']['origin'], v['props']['origin']
                cv.line(out, ou, ov, CYAN, 2)

imshow_fig(cv2ski(out))

In [None]:
def compare_features(dobbles, i, j):
    print(f'name\tf({i})\tf({j})\tdiff')
    cr_i, hu_i = dobble_features(dobbles[i])
    cr_j, hu_j = dobble_features(dobbles[j])
    #names = 'rx,rax,blk,wht,orng1,ylw,blu1,blu2,mag1,mag2,red,dred'
    for i, (a, b) in enumerate(zip(cr_i, cr_j)):
        print(f'cr {i}\t{a:.4f}\t{b:.4f}\t{abs(a-b):.4f}')
    for i, (a, b) in enumerate(zip(hu_i, hu_j)):
        print(f'hu {i}\t{a:.4f}\t{b:.4f}\t{abs(a-b):.4f}')
        
compare_features(dobbles, 19, 21)

In [None]:
def load_cutouts(base_path):
    df = []
    paths = glob(os.path.join(base_path, '*/*.jpg'))
    for path in tqdm(paths):
        label = path.split('/')[-2]
        cutout = cv.imread(path)
        features = dobble_features(cutout)[:46+7]
        df.append(dict(label=label, features=features, cutout=cutout))
    return pd.DataFrame(df)
        
def make_cutouts(base_path):
    df = []
    paths = glob(os.path.join(base_path, '*.jpg'))
    for path in tqdm(paths):
        circles, dobbles, _ = extract_metadata(path)
        for i, dobble in enumerate(dobbles):
            cutout = resizeAndPad(dobble['cutout'], (40, 40))
            features = dobble_features(dobble['cutout'])[:46+7]
            df.append(dict(source=path, features=features, cutout=cutout))
    return pd.DataFrame(df)

def plot_clusters(df):
    clusters = [[x, cutout_grid(df[df.label == x].cutout)] for x in df.label.unique()]
    clusters.sort(key=lambda x: x[0])
    w, h = rect_size(len(clusters))
    plt.figure(figsize=(13, 13), dpi=100)
    for (i, _, _), (label, img) in enumerate_grid(clusters, (w, h)):
        plt.subplot(w, h, i+1)
        plt.imshow(img)
        plt.title(label)
        plt.axis('off')
    plt.show()

df = load_cutouts('res/sup')

In [None]:
import umap
data = np.array([list(x) for x in df.features])
embedding = umap.UMAP(n_neighbors=5, n_components=2, min_dist=0, random_state=42).fit_transform(data)
df['x'], df['y'] = embedding.T
#df['label'] = hdbscan.HDBSCAN(min_cluster_size=10).fit_predict(embedding)
plot_embedding(df, 'x', 'y', 'label', legend=False)

In [None]:
plot_clusters(df)