In [None]:
from __future__ import division

import os
import glob
from collections import defaultdict, Counter
import colorsys

import imageio
import numpy as np
from sklearn.cluster import KMeans

import matplotlib as mpl
from matplotlib import pyplot as plt

from evaluation.bbox_iou import bbox_iou

In [None]:
def size(mask, bbox):
    tly, tlx, bry, brx = bbox
    return np.count_nonzero(mask[tly:bry,tlx:brx])

In [None]:
def form_factor(bbox):
    tly, tlx, bry, brx = bbox
    width = brx - tlx
    height = bry - tly
    return width / height

In [None]:
def filling_ratio(mask, bbox):
    tly, tlx, bry, brx = bbox
    width = brx - tlx
    height = bry - tly
    bbox_area = width * height
    mask_area = size(mask, bbox)
    return mask_area / bbox_area

In [None]:
def compute_num_overlap(gts):
    if len(gts) > 1:
        for i in range(len(gts)):
            bboxA = list(map(float, gts[i][:4]))
            for j in range(i+1, len(gts)):
                bboxB = list(map(float, gts[j][:4]))
                print(img, bbox_iou(bboxA, bboxB))

In [None]:
def dominant_colors(img, mask, bbox, k=7, n=2):
    tly, tlx, bry, brx = bbox
    
    img_patch = img[tly:bry,tlx:brx]
    mask_patch = mask[tly:bry,tlx:brx]
    mask_patch = np.repeat(mask_patch[:,:,np.newaxis], 3, axis=2)
    pixels = img_patch[np.nonzero(mask_patch)].reshape((-1, 3))
    
    clt = KMeans(n_clusters=k)
    labels = clt.fit_predict(pixels)
    
    c = Counter(labels)
    most_common, _ = zip(*c.most_common(n))
    most_common = list(most_common)
    
    return clt.cluster_centers_[most_common].astype(np.uint8)

In [None]:
def show_patch(img, mask, bbox):
    fig = plt.figure(figsize=(6, 1), frameon=False)
    tly, tlx, bry, brx = bbox
    img_patch = img[tly:bry,tlx:brx]
    mask_patch = mask[tly:bry,tlx:brx]
    mask_patch = np.minimum(mask_patch, 1)
    mask_patch = np.repeat(mask_patch[:,:,np.newaxis], 3, axis=2)
    plt.imshow(img_patch * mask_patch)

In [None]:
class_frequency = defaultdict(int)
size_per_class = defaultdict(list)
form_factor_per_class = defaultdict(list)
filling_ratio_per_class = defaultdict(list)
rgb_colors_per_class = defaultdict(list)
for img_file in sorted(glob.glob('data/train/*.jpg')):
    name = os.path.splitext(os.path.split(img_file)[1])[0]
    mask_file = 'data/train/mask/mask.{}.png'.format(name)
    gt_file = 'data/train/gt/gt.{}.txt'.format(name)
    img = imageio.imread(img_file)
    mask = imageio.imread(mask_file)
    gts = [line.split(' ') for line in open(gt_file, 'r').read().splitlines()]
    for gt in gts:
        bbox = np.round(list(map(int, map(float, gt[:4]))))
        label = gt[4]
        
        class_frequency[label] += 1
        size_per_class[label].append(size(mask, bbox))
        form_factor_per_class[label].append(form_factor(bbox))
        filling_ratio_per_class[label].append(filling_ratio(mask, bbox))
        rgb_colors_per_class[label].extend(dominant_colors(img, mask, bbox))

In [None]:
labels = sorted(class_frequency.keys())
heights = [class_frequency[label] for label in labels]
fig = plt.figure(figsize=(14, 7))
plt.bar(range(len(labels)), heights, tick_label=labels)
fig.suptitle('Frequency per class')
plt.show()

In [None]:
labels = sorted(size_per_class.keys())
ncols = 2
nrows = int(np.ceil(len(labels) / ncols))
fig, ax = plt.subplots(nrows, ncols, figsize=(14, 7))
for idx, label in enumerate(labels):
    i, j = idx//ncols, idx%ncols
    ax[i, j].hist(size_per_class[label])
    ax[i, j].set_title(label)
fig.subplots_adjust(hspace=0.5)
fig.suptitle('Size per class (in pixels)')
plt.show()

In [None]:
labels = sorted(form_factor_per_class.keys())
ncols = 2
nrows = int(np.ceil(len(labels) / ncols))
fig, ax = plt.subplots(nrows, ncols, figsize=(14, 7))
for idx, label in enumerate(labels):
    i, j = idx//ncols, idx%ncols
    ax[i, j].hist(form_factor_per_class[label])
    ax[i, j].set_title(label)
fig.subplots_adjust(hspace=0.5)
fig.suptitle('Form factor per class')
plt.show()

In [None]:
labels = sorted(filling_ratio_per_class.keys())
ncols = 2
nrows = int(np.ceil(len(labels) / ncols))
fig, ax = plt.subplots(nrows, ncols, figsize=(14, 7), frameon=False)
for idx, label in enumerate(labels):
    i, j = idx//ncols, idx%ncols
    ax[i, j].hist(filling_ratio_per_class[label])
    ax[i, j].set_title(label)
fig.subplots_adjust(hspace=0.5)
fig.suptitle('Filling ratio per class')
plt.show()

In [None]:
hsv_colors_per_class = defaultdict(list)
for label in rgb_colors_per_class.keys():
    for rgb in rgb_colors_per_class[label]:
        hsv = list(colorsys.rgb_to_hsv(*(rgb/255)))
        hsv[2] = 1  # maximum amount of white
        hsv_colors_per_class[label].append(hsv)

In [None]:
labels = sorted(hsv_colors_per_class.keys())
ncols = 2
nrows = int(np.ceil(len(labels) / ncols))
fig, ax = plt.subplots(nrows, ncols, figsize=(14, 7))
for idx, label in enumerate(labels):
    hsv_colors = hsv_colors_per_class[label]
    clt = KMeans(n_clusters=2)
    clt.fit(hsv_colors)
    avg_colors = clt.cluster_centers_
    
    i, j = idx//ncols, idx%ncols
    rgb_colors = [colorsys.hsv_to_rgb(*hsv) for hsv in avg_colors]
    for x, color in enumerate(rgb_colors):
        ax[i, j].add_patch(mpl.patches.Rectangle((x, 0), 1, 1, facecolor=color))
    ax[i, j].set_xlim((0, len(rgb_colors)))
    ax[i, j].set_ylim((0, 1))
    ax[i, j].set_xticks([])
    ax[i, j].set_yticks([])
    ax[i, j].set_aspect("equal")
    ax[i, j].set_title(label)
fig.tight_layout()
plt.show()

In [None]:
hsv_colors = np.concatenate(list(hsv_colors_per_class.values()))
clt = KMeans(n_clusters=3)  # 3 colors
clt.fit(hsv_colors)
avg_colors = clt.cluster_centers_
[print('({:.4f}, {:.4f}, {:.4f})'.format(*c)) for c in avg_colors]


rgb_colors = [colorsys.hsv_to_rgb(*hsv) for hsv in avg_colors]
fig = plt.figure(figsize=(6, 1), frameon=False)
ax = fig.add_subplot(111)
for x, color in enumerate(rgb_colors):
    ax.add_patch(mpl.patches.Rectangle((x, 0), 1, 1, facecolor=color))
ax.set_xlim((0, len(rgb_colors)))
ax.set_ylim((0, 1))
ax.set_xticks([])
ax.set_yticks([])
ax.set_aspect('equal')
plt.show()