In [None]:
#CHANGE THIS TO LOCAL VALUES
#set train.csv path
csv_path="../input/hpa-single-cell-image-classification/train.csv"
#set folder path of images in dataset
img_folder_path="../input/hpa-single-cell-image-classification/train/"

In [None]:
#OPTIONAL!
#leave USE_PARTITIONS = False if the entire dataset should be used
#use this to split up data if this is needed

USE_PARTITIONS = False #set to False to use entire dataset
PARTS = 10 #amount of parts to split dataset up into
CURRENT_PARTITION = 0 #if USE_PARTITIONS is True, this is the partition that is to be used in this run

In [None]:
#DOES NOT HAVE TO BE CHANGED BUT CAN BE CHANGED
#set model paths
#if these files do not exist yet, the program will automatically download them
NUC_MODEL = "../input/hpacellsegmentatormodelweights/dpn_unet_nuclei_v1.pth"
CELL_MODEL = "../input/hpacellsegmentatormodelweights/dpn_unet_cell_3ch_v1.pth"

In [None]:
#installs and imports
!pip install https://github.com/CellProfiling/HPA-Cell-Segmentation/archive/master.zip
!pip install bbox-visualizer
import pandas as pd
import numpy as np
import hpacellseg.cellsegmentator as cellsegmentator
from hpacellseg.utils import label_cell, label_nuclei
from PIL import Image, ImageDraw
from tqdm import tqdm
import os.path
import matplotlib.pyplot as plt
import csv
import bbox_visualizer as bbv

In [None]:
#make a new directory for the masks
dirName="masks"
if not os.path.exists(dirName):
    os.mkdir(dirName)
#read id/label csv to array
id_labels_array=pd.read_csv(csv_path)
#fix labels (convert to arrays)
id_labels_array["Label"]=id_labels_array["Label"].apply(lambda x:list(map(int, x.split("|"))))     
#create list of all ids from the id_labels_array
id_array=(id_labels_array["ID"]).tolist()
#create dictionary of all unique labels from the id_labels_array
labels=id_labels_array.set_index('ID').T.to_dict('list')
#fix the dictionary format (2d arrays to 1D arrays)
labels = {num: labels[0] for num, labels in labels.items()}

In [None]:
def partition_data(ids,n_parts):
    """
    input:
        ids=list of image ids
        n_parts=integer value of desired number of parts
    output:
        partition=dictionary of the ids that is split up into parts
    """
    partition={}
    parts = np.array_split(ids, n_parts)
    for i,array in enumerate(parts):
        partition[i]=array
    return partition

In [None]:
#CHOOSE THE PARTITION TO BE USED
if USE_PARTITIONS==True:
    id_array=partition_data(id_array,PARTS)
    id_array=id_array[CURRENT_PARTITION]

In [None]:
segmentator = cellsegmentator.CellSegmentator(
    NUC_MODEL,
    CELL_MODEL,
    scale_factor=0.25,
    device="cuda", #"cuda" for gpu, "cpu" for cpu
    padding=False,
    multi_channel_model=True,
)

In [None]:
#iterates through image ids and creates numpy arrays of the masks
for img_id in tqdm(id_array):
    maskpath="masks/mask_"+img_id+".npy"
    if os.path.isfile(maskpath)==False:
        path=img_folder_path+img_id
        ch_r = Image.open(path+"_red.png")
        ch_y = Image.open(path+"_yellow.png")
        ch_b = Image.open(path+"_blue.png")
        nuc_segmentations = segmentator.pred_nuclei([np.asarray(ch_b)])
        cell_segmentations = segmentator.pred_cells([
                [np.asarray(ch_r)],
                [np.asarray(ch_y)],
                [np.asarray(ch_b)]
            ])
        cell_nuclei_mask, cell_mask = label_cell(nuc_segmentations[0], cell_segmentations[0])
        cell_mask = np.uint8(cell_mask)
        np.save(maskpath,cell_mask)

In [None]:
#visualize one of the masks
img_id="5c27f04c-bb99-11e8-b2b9-ac1f6b6435d0" #random img id of img that has been converted
mask_path="./masks/mask_"+img_id+".npy"
img_r=np.asarray(Image.open(img_folder_path+img_id+"_red.png"))
img_g=np.asarray(Image.open(img_folder_path+img_id+"_green.png"))
img_b=np.asarray(Image.open(img_folder_path+img_id+"_blue.png"))

plt.imshow(np.load(mask_path))
plt.show()
plt.imshow(np.dstack((img_r,img_g,img_b)))
plt.show()

In [None]:
def bbox(img,cell_id):
    a = np.where(img == cell_id)
    xmin, ymin, xmax, ymax = np.min(a[1]), np.min(a[0]), np.max(a[1]), np.max(a[0])
    return xmin, ymin, xmax, ymax

In [None]:
with open(r'bboxes.csv', 'a', newline='') as csvfile:
    fieldnames = ['img_id','bboxes']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    for img_id in tqdm(id_array):
        cell_mask = np.load("./masks/mask_"+img_id+'.npy')
        cell_mask_flattened=np.ravel(cell_mask)
        cell_ids=set(cell_mask_flattened)
        cell_ids.remove(0)
        bbox_dims=list()
        for cell_id in cell_ids:
            xmin, ymin, xmax, ymax = bbox(cell_mask,cell_id)
            bbox_dims.append([xmin, ymin, xmax, ymax])
        writer.writerow({'img_id':img_id, 'bboxes':bbox_dims})

In [None]:
#visualize bbox results
img_id="5c27f04c-bb99-11e8-b2b9-ac1f6b6435d0" #random img id of img that has been converted
img_r=np.asarray(Image.open(img_folder_path+img_id+"_red.png"))
img_g=np.asarray(Image.open(img_folder_path+img_id+"_green.png"))
img_b=np.asarray(Image.open(img_folder_path+img_id+"_blue.png"))
img=np.dstack((img_r,img_g,img_b))
cell_mask = np.load("./masks/mask_"+img_id+'.npy')
cell_mask_flattened=np.ravel(cell_mask)
cell_ids=set(cell_mask_flattened)
cell_ids.remove(0)
bboxes=list()
for cell_id in cell_ids:
    xmin, ymin, xmax, ymax = bbox(cell_mask,cell_id)
    bboxes.append([xmin, ymin, xmax, ymax])
plt.imshow(bbv.draw_multiple_rectangles(img, bboxes))
plt.show()