In [None]:
from filepath_util import *
import matplotlib.pyplot as plt
import numpy as np
import os
from tqdm import tqdm
import seaborn as sns
import pandas as pd

%matplotlib inline

In [None]:
IMAGE_FILEPATHS = [
    'dataset/1000/10uM_biotin_1000x__024.tif',
    'dataset/1000/5uM_biotin_1000x__006.tif',
    'dataset/1000/naive RBCs_1000x__061.tif',
    'dataset/1000/20uM_biotin_1000x__043.tif',
]

In [None]:
def read_manual_labels(image_filepath):
    labels_filepath = os.path.splitext(image_filepath)[0] + '.csv'
    labels = pd.read_csv(labels_filepath, header=0, index_col=0)
    return labels

In [None]:
AMBIGIOUS_CLASS_LABEL = 4
LABEL_COLUMN = 'Counter'

def label_masks(masks, labels: pd.DataFrame):
    label_is_found = set()
    mask_is_found = set()
    label_is_found_in_multiple_masks = set()
    for mask_index, mask in enumerate(masks):
        bbox = mask['bbox']
        segmentation = mask['segmentation']
        mask['ambigious'] = False
        mask['label'] = AMBIGIOUS_CLASS_LABEL
        matching_labels = 0
        x_0 = int(bbox[0])
        x_1 = int(bbox[0] + bbox[2])
        y_0 = int(bbox[1])
        y_1 = int(bbox[1] + bbox[3])
        for label_index, (x, y, label) in enumerate(zip(labels['X'], labels['Y'], labels[LABEL_COLUMN])):
            x = int(x)
            y = int(y)
            if x >= x_0 and x <= x_1 and y >= y_0 and y <= y_1 and segmentation[y - y_0, x - x_0]:
                matching_labels += 1
                if label_index in label_is_found:
                    label_is_found_in_multiple_masks.add(label_index)
                mask['label'] = label
                label_is_found.add(label_index)
                mask_is_found.add(mask_index)
                
        if matching_labels > 1:
            mask['ambigious'] = True
            mask['label'] = AMBIGIOUS_CLASS_LABEL
    
    return label_is_found, mask_is_found, label_is_found_in_multiple_masks

def label_masks_v2(masks, labels: pd.DataFrame):
    label_is_found = set()
    mask_is_found = set()
    label_is_found_in_multiple_masks = set()

    found = 0
    count = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}

    for (x, y, label) in zip(labels['X'], labels['Y'], labels[LABEL_COLUMN]):
        x = int(x)
        y = int(y)
        label_found = False
        for mask in masks:
            bbox = mask['bbox']
            segmentation = mask['segmentation']
            x_0 = int(bbox[0])
            x_1 = int(bbox[0] + bbox[2])
            y_0 = int(bbox[1])
            y_1 = int(bbox[1] + bbox[3])
            if x >= x_0 and x <= x_1 and y >= y_0 and y <= y_1 and segmentation[y - y_0, x - x_0]:
                if 'label' in mask or label_found:
                    mask['label'] = AMBIGIOUS_CLASS_LABEL
                else:
                    mask['label'] = label
                    label_found = True
    
    for mask in masks:
        if 'label' in mask:
            found += 1
            count[mask['label']] += 1
                
    print('found {} masks'.format(found))
    print(count)
    
    return label_is_found, mask_is_found, label_is_found_in_multiple_masks

In [None]:
def get_masks_img(masks, image):
    sorted_masks = sorted(masks, key=(lambda x: x['area']), reverse=True)
    masks_img = np.ones((image.shape[0], image.shape[1], 4))
    masks_img[:, :, 3] = 0
    for mask in sorted_masks:
        (x, y, w, h) = mask['bbox']
        x, y, w, h = int(x), int(y), int(w), int(h)
        segmentation = mask['segmentation']
        if 'color' not in mask: continue
        color_mask = mask['color']
        masks_img[y : y + h + 1, x : x + w + 1][segmentation] = color_mask

    return masks_img

In [None]:
_dx = [-1, -1, -1, 0, 0, 1, 1, 1]
_dy = [-1, 0, 1, -1, 1, -1, 0, 1]
def is_countour(segmentation, x, y):
    if not segmentation[x][y]: return False

    if x == 0 or y == 0 or x + 1 == segmentation.shape[0] or y + 1 == segmentation.shape[1]:
        return True
    
    for dx, dy in zip(_dx, _dy):
        if not segmentation[x + dx][y + dy]: return True
    
    return False

def find_diameter(mask, coeff):
    segmentation = mask['segmentation']
    countour = []
    for x0 in range(segmentation.shape[0]):
        for y0 in range(segmentation.shape[1]):
            if is_countour(segmentation, x0, y0):
                countour.append((x0, y0))

    d = 0
    for (x0, y0) in countour:
        for (x1, y1) in countour:
            d = max(d, (x1-x0)**2 + (y1-y0)**2)
    
    return d**(0.5) * coeff

In [None]:
image_filepath = IMAGE_FILEPATHS[0]

all_masks = []
for image_filepath in IMAGE_FILEPATHS:
    labels = read_manual_labels(image_filepath)
    print(labels.head())
    print(labels.shape)

    image = read_image(image_filepath)
    masks = read_masks_for_image(image_filepath)

    label_is_found, mask_is_found, label_is_found_in_multiple_masks = label_masks_v2(masks, labels)

    for mask in masks:
        if 'label' not in mask: continue
        if mask['label'] == 0:
            mask['color'] = [255, 0, 0, 0.35]
        if mask['label'] == 1:
            mask['color'] = [0, 255, 0, 0.35]
        if mask['label'] == 2:
            mask['color'] = [0, 0, 255, 0.35]
        if mask['label'] == 3:
            mask['color'] = [0.9, 0.9, 0.9, 0.35]
        
    masks_image = get_masks_img(masks, image)

    all_masks.append(masks)
            

    plt.figure(figsize=(20,20))
    plt.imshow(image)
    ax = plt.gca()
    ax.set_autoscale_on(False)
    ax.imshow(masks_image)
    plt.axis('off')
    plt.show() 

In [None]:
all_diameters = []
for masks in all_masks:
    diameters = {0: [], 1: [], 2: [], 3: [], 4: []}
    for mask in tqdm(masks):
        if 'label' not in mask: continue
        diameter = find_diameter(mask, coeff=50/1212)
        if diameter > 3:
            mask['diameter'] = diameter
            diameters[mask['label']].append(mask['diameter'])
    
    all_diameters.append(diameters)

In [None]:
diameters = all_diameters[3]
_labels = []
_diameters = []
for k in [0, 1, 2]: 
    for d in diameters[k]:
        _labels.append(k)
        _diameters.append(d)

df = pd.DataFrame({"label":_labels, "diameter":_diameters})

df.head()
sns.histplot(data=df, x="diameter", hue="label", bins=15, kde=True, stat='density', palette=['red', 'green', 'blue'])

for k in [0, 1, 2]:
    print('label', k)
    print('mean', df.loc[df['label'] == k, 'diameter'].mean())
    print('std', df.loc[df['label'] == k, 'diameter'].std())
    print()

In [None]:
for diameters in all_diameters:
    plt.subplots(figsize=(7,6), dpi=600)
    sns.histplot(diameters[0], color="red", label="0", kde=True)
    sns.histplot(diameters[1], color="green", label="1", kde=True)
    sns.histplot(diameters[2], color="blue", label="2", kde=True)

    plt.legend()

In [None]:
for image_index, diameters in enumerate(all_diameters):
    data = {'label': [], 'diameter': []}
    for k in [0, 1, 2]:
        for i in range(len(diameters[k])):
            data['label'].append(k)
            data['diameter'].append(diameters[k][i])           
    
    df = pd.DataFrame(data)
    df.to_csv(os.path.normpath('C://Users/lapin/Documents/for_alina/rbc/diameters_{}.csv'.format(image_index)), index=False)

In [None]:
for mask in masks:
    if 'label' not in mask: continue
    if mask['label'] not in [0, 1, 2]: continue
    if 'diameter' not in mask: continue
    if mask['diameter'] > 5.7:
        print(mask['label'])
        print(mask['diameter'])
        coords = mask['bbox']
        print(coords)
        segmentation = mask['segmentation']
        roi = image[coords[1]:coords[1]+coords[3]+1, coords[0]:coords[0]+coords[2]+1]
        masked_roi = roi * segmentation[:, :, np.newaxis]

        plt.figure(figsize=(1,1))
        plt.imshow(masked_roi)
        plt.axis('off')
        plt.show()