In [4]:
import geojson
import geopandas as gpd
import json
import numpy as np
import pandas as pd
import ast
import os
import skimage

In [None]:
processed_annotations = pd.read_csv("processed_annotations/batch2/865247_processed_annotations.csv")
processed_annotations

Unnamed: 0,detection_x,detection_y,contour,Classification
0,39283.5,4882.5,"[[[39282, 4868], [39282, 4868], [39278, 4868],...",Inflammatory
1,41779.0,12141.0,"[[[41781, 12135], [41780, 12136], [41780, 1213...",Inflammatory
2,41763.5,12474.0,"[[[41764, 12470], [41764, 12471], [41764, 1247...",Inflammatory
3,41733.5,12912.0,"[[[41734, 12906], [41734, 12906], [41732, 1290...",Inflammatory
4,41727.5,12938.5,"[[[41726, 12936], [41725, 12936], [41725, 1293...",Inflammatory
...,...,...,...,...
34765,12282.5,27647.0,"[[[12282, 27645], [12282, 27646], [12281, 2764...",Epithelial
34766,12018.5,27658.0,"[[[12016, 27654], [12015, 27656], [12015, 2765...",Epithelial
34767,12281.5,27682.5,"[[[12280, 27678], [12280, 27679], [12279, 2767...",Epithelial
34768,12191.0,27786.5,"[[[12192, 27783], [12191, 27784], [12190, 2778...",Epithelial


In [5]:
# Masking functions to prepare the image masks in the HoverNet format

def create_polygon_mask_instance(shape, points, instance):
    """ 
    Creates a mask for given set of polygon points, according to its instance number
    """
    return skimage.draw.polygon2mask(shape, points) * instance

def create_polygon_mask_class(shape, points, classification):
    """
    Creates a mask for a given set of polygon points, according to the cell classification.
    """
    label = 0
    if classification == 'Neoplastic':
        label = 1
    if classification == 'Connective':
        label = 2
    if classification == 'Inflammatory':
        label = 3
    if classification == 'Dead':
        label = 4
    if classification == 'Epithelial':
        label = 5
    return skimage.draw.polygon2mask(shape, points) * label, label

In [6]:
example_annotation_file = "annotations/batch2/865247_good15_TS2_TEI6.geojson"
with open(example_annotation_file, "r") as f:
    annotation_data = geojson.load(f)

In [7]:
def extract_tiles(annotation_data):
    """Function to read in tiles and extract coordinates for resized 256 x 256 tiles"""

    tiles = {}
    columns = ['tile_index', 'start_x', 'end_x', 'start_y', 'end_y']

    tile_counter = 0
    for i in range(len(annotation_data['features'])):
        current_tile = annotation_data['features'][i]
        start_coords = np.min(np.array(current_tile['geometry']['coordinates'][0]), axis = 0)
        end_coords = np.max(np.array(current_tile['geometry']['coordinates'][0]), axis = 0)
        for j in range(start_coords[0], end_coords[0], 256):
            for k in range(start_coords[1], end_coords[1], 256):
                tile_counter += 1
                tiles[tile_counter] = [tile_counter, j, j+256, k, k+256]
    
    tiles_df = pd.DataFrame.from_dict(tiles, orient='index', columns=columns)
    return tiles_df

In [8]:
tiles_df = extract_tiles(annotation_data)
tiles_df

Unnamed: 0,tile_index,start_x,end_x,start_y,end_y
1,1,9200,9456,16531,16787
2,2,9200,9456,16787,17043
3,3,9200,9456,17043,17299
4,4,9200,9456,17299,17555
5,5,9456,9712,16531,16787
...,...,...,...,...,...
364,364,5961,6217,14328,14584
365,365,6217,6473,13560,13816
366,366,6217,6473,13816,14072
367,367,6217,6473,14072,14328


In [9]:
def set_tile_index(row, tiles_df):
    """Function to find the correct tile per nucleus"""

    detection_x = row['detection_x']
    detection_y = row['detection_y']
    for i in range(len(tiles_df)):
        current_tile = tiles_df.iloc[i]
        if (detection_x >= current_tile['start_x']) and (detection_x <= current_tile['end_x']) and (detection_y >= current_tile['start_y']) and (detection_y <= current_tile['end_y']):
            return current_tile['tile_index']
    
    return np.nan

In [10]:
processed_annotations['tile_index'] = processed_annotations.apply(lambda row: set_tile_index(row, tiles_df), axis=1)
processed_annotations = processed_annotations.dropna(subset=['tile_index'])
processed_annotations['tile_index'] = processed_annotations['tile_index'].astype(int)
processed_annotations

Unnamed: 0,detection_x,detection_y,contour,Classification,tile_index
0,39283.5,4882.5,"[[[39282, 4868], [39282, 4868], [39278, 4868],...",Inflammatory,254
1,41779.0,12141.0,"[[[41781, 12135], [41780, 12136], [41780, 1213...",Inflammatory,201
2,41763.5,12474.0,"[[[41764, 12470], [41764, 12471], [41764, 1247...",Inflammatory,202
3,41733.5,12912.0,"[[[41734, 12906], [41734, 12906], [41732, 1290...",Inflammatory,204
4,41727.5,12938.5,"[[[41726, 12936], [41725, 12936], [41725, 1293...",Inflammatory,204
...,...,...,...,...,...
34765,12282.5,27647.0,"[[[12282, 27645], [12282, 27646], [12281, 2764...",Epithelial,139
34766,12018.5,27658.0,"[[[12016, 27654], [12015, 27656], [12015, 2765...",Epithelial,136
34767,12281.5,27682.5,"[[[12280, 27678], [12280, 27679], [12279, 2767...",Epithelial,140
34768,12191.0,27786.5,"[[[12192, 27783], [12191, 27784], [12190, 2778...",Epithelial,136


In [12]:
for i in range(len(tiles_df)):
    current_tile = tiles_df.iloc[i]
    tile_index = current_tile['tile_index']
    tile_annotations = processed_annotations[processed_annotations['tile_index'] == tile_index]
    
    if len(tile_annotations) == 0:
        continue
    
    instance_map = np.zeros((256, 256), dtype=np.uint16)
    type_map = np.zeros((256, 256), dtype=np.uint8)
    
    for j in range(len(tile_annotations)):
        annotation = tile_annotations.iloc[j]
        polygon_points = np.array(ast.literal_eval(annotation['contour'])[0])
        adjusted_points = polygon_points - np.array([current_tile['start_x'], current_tile['start_y']])
        
        poly_instance_mask = create_polygon_mask_instance((256, 256), np.rint(adjusted_points), j + 1)
        poly_class_mask, class_label = create_polygon_mask_class((256, 256), np.rint(adjusted_points), annotation['Classification'])
        
        instance_map = np.maximum(instance_map, poly_instance_mask)
        type_map = np.maximum(type_map, poly_class_mask)
    
    instance_map = instance_map.T
    type_map = type_map.T

    formatted_annotations = {'inst_map': instance_map, 'type_map': type_map}

    with open(f"labels/batch2/865247_tile_{tile_index:04}.npy", "wb") as f:
        np.save(f, formatted_annotations)

In [13]:
test_data = np.load('labels/batch2/865247_tile_0001.npy', allow_pickle = True)
test_data = test_data.tolist()
test_data

{'inst_map': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], shape=(256, 256)),
 'type_map': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], shape=(256, 256))}