In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Circle
from collections import Counter
from skimage import segmentation
from tqdm import tqdm

import scipy.ndimage as ndi
from scipy.ndimage.morphology import binary_fill_holes

from pptx import Presentation
from pptx.util import Pt, Emu

from PIL import Image
import io

from sklearn.cluster import KMeans

In [3]:
def emu2pt(val):
    '''
    Convert value from EMU to points
    '''
    return Emu(val).pt

def center_of_shape(shape):
    top, left = shape.top, shape.left
    width, height = shape.width, shape.height
    
    y = top + height / 2
    x = left + width / 2
    
    return emu2pt(x), emu2pt(y)

def convolve_max(img, shape, stride, threshold=0.8):
    new_img = img.copy()
    for j in np.arange(0, img.shape[1], stride[1]):
        for i in np.arange(0, img.shape[0], stride[0]):
            subimg = new_img[i:i+shape[0], j:j+shape[1]]
            freq = Counter(subimg.flatten())
            most_common, num = freq.most_common()[0]
            if num / subimg.size > threshold:
                new_img[i:i+shape[0], j:j+shape[1]] = most_common
    return new_img

## Loading the image and the location of true nuclie

In [4]:
img = plt.imread('original.jpg')
prs = Presentation('./KidneyAnnotated-GW.pptx')

img_shape = img.shape
slide_shape = emu2pt(prs.slide_width), emu2pt(prs.slide_height)
scale = img_shape[0] / slide_shape[0]

nuclie_slide = prs.slides[0]
shapes = nuclie_slide.shapes
true_locs = [center_of_shape(shape) for shape in nuclie_slide.shapes]
true_locs = [(x[0] * scale, x[1] * scale) for x in true_locs]

## Clustering pixels

In [7]:
kmeans = KMeans(n_clusters=4, n_jobs=100, random_state=0)
kmeans.fit(img.reshape((img.shape[0]**2, -1)))
clusters = kmeans.predict(img.reshape((-1, 3))).reshape((img.shape[0], img.shape[1]))

In [8]:
# Convolve a max pooling filter to denoise the clusters
convolved_img = convolve_max(clusters, shape=(4, 4), stride=(3, 3))

## Get the location of predicted nuclie

In [11]:
THRESHOLD = 200

freqs = Counter(convolved_img.flatten())
blue_num = freqs.most_common()[-1][0]
brown_num = freqs.most_common()[-2][0]

blue_masked = convolved_img.copy()
blue_masked[blue_masked != blue_num] = 0

label_objects, _ = ndi.label(blue_masked, )
sizes = np.bincount(label_objects.ravel())
mask_sizes = sizes > THRESHOLD
mask_sizes[0] = 0
blue_cleaned = mask_sizes[label_objects]

label_objects, _ = ndi.label(blue_cleaned)

predicted_locs = []
for i in range(1, np.max(label_objects)):
    locs = np.argwhere(label_objects == i)
    predicted_loc = locs.mean(0)
    if predicted_loc[1] < img.shape[1] / 2:
        predicted_locs.append(locs.mean(0))

## Plotting the predicted points

In [12]:
fig, ax = plt.subplots(1, figsize=(30, 30))
ax.imshow(img)

for x, y in true_locs:
    ax.add_patch(Circle((x, y), radius=3, fill=False, color='red'))
    
for x, y in predicted_locs:
    ax.add_patch(Circle((x, y), radius=3, fill=False, color='green'))
fig.savefig('image.png')
plt.close()

## Computing the accuracy measure

In [14]:
true_locs

[(1793.3254593175855, 2108.419783464567),
 (1852.287565616798, 2111.037729658793),
 (2344.6010498687665, 1138.990157480315),
 (2296.7344160104985, 1107.6469816272968),
 (2442.115813648294, 1115.9545603674542),
 (2477.0849737532812, 1131.6261482939633),
 (2349.1983267716537, 1171.7765748031497),
 (2403.1973425196857, 1100.1350065616798),
 (2416.4893372703414, 1213.6665026246721),
 (2463.771817585302, 1116.2782152230973),
 (2353.8518700787404, 1233.7874015748032),
 (2370.910597112861, 1202.4442257217847),
 (2437.9337270341207, 1238.73687664042),
 (2236.7088254593177, 1645.080216535433),
 (2347.902887139108, 1295.3828740157483),
 (2479.1405839895015, 2111.42749343832),
 (2221.536909448819, 1676.4516076115488),
 (2419.938812335958, 1565.6307414698163),
 (2265.6092519685044, 1559.5900590551182),
 (2600.5782480314965, 1445.4110892388453),
 (2555.6612532808404, 1523.1215551181103),
 (2401.4512795275596, 1770.5096784776904),
 (2239.9827755905512, 1530.8093832020998),
 (2555.921916010499, 1486.

In [38]:
errors = []
for pred in predicted_locs:
    distances = np.sqrt(np.sum((pred - true_locs) ** 2, axis=1))
    errors.append(distances.min())

mse = np.mean(np.power(errors, 2))
print('Mean Squared Error:', mse)

Mean Squared Error: 1983.17532355447
