In [1]:
import torch

# Setting device on GPU if available, else CPU.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

# Additional Info on devices, when using cuda.
if device.type == 'cuda':
    for i in range(torch.cuda.device_count()):
        print()
        print('Device ',i)
        print(torch.cuda.get_device_name(i), round(torch.cuda.get_device_properties(i).total_memory/1024**3,1), 'GB')
        print('Memory Usage:')
        print('Allocated:  ', round(torch.cuda.memory_allocated(i)/1024**3,1), 'GB')
        print('Reserved:   ', round(torch.cuda.memory_reserved(i)/1024**3,1), 'GB')

Using device: cpu


In [2]:
import importlib
import time
import hpacellseg.cellsegmentator as cellsegmentator
from src import utils

In [4]:
NUC_MODEL = "./segmentation/nuclei-model.pth"
CELL_MODEL = "./segmentation/cell-model.pth"

segmentator = cellsegmentator.CellSegmentator(
    NUC_MODEL,
    CELL_MODEL,
    scale_factor=0.25,
    device=str(device),
    padding=True,
    multi_channel_model=True,
)

In [None]:
def create_cell_segmentation_rles(csv_file, root_dir, segmentator,test=False,n_samples=None,n_workers=None):
    """Creates new maskwise test.csv."""
    if not n_workers: n_workers=num_cores
    images_frame = pandas.read_csv(csv_file)
    # Build image paths.
    print(f"Globbing {root_dir}.")
    fileiterator = glob.iglob(root_dir + '/' + '*_red.png')
    if n_samples:
        mt = [next(fileiterator) for i in range(n_samples)]
        print(f"Processing first {n_samples} images.")
    else:
        mt = [f for f in fileiterator]
        print(f"Processing {len(mt)} images.")
    er = [f.replace('red', 'yellow') for f in mt]
    nu = [f.replace('red', 'blue') for f in mt]
    images = zip(mt, er, nu)
    processed_list = Parallel(n_jobs=int(n_workers))(
        delayed(segment_image)(i, segmentator, images_frame, test) for i in tqdm(images)
    )
    print("Finished.")
    cells = []
    for l in processed_list:
        cells += l
    cells_frame = pandas.DataFrame(cells)
    return cells_frame

In [None]:
def segment_image(image, segmentator, images_frame, test):
    """ Returns list of cell masks. """
    mt, er, nu = image
    # Run segmentation model. (Takes list of lists...)
    nuc_segmentations = segmentator.pred_nuclei([nu])
    cell_segmentations = segmentator.pred_cells([[mt],[er],[nu]])
    # Get image ID and label. (hacky?)
    image_id = os.path.basename(mt).split('.')[0].replace("_red","")
    if test: 
        label = None
    else:
        print(image_id)
        print(images_frame.Label[images_frame.ID == image_id])
        label = images_frame.Label[images_frame.ID == image_id].values[0]
    # Get cell masks and rle encode.
    nuclei_mask, cell_mask = label_cell(nuc_segmentations[0], cell_segmentations[0])
    id_shape = cell_mask.shape
    cell_ids = numpy.unique(cell_mask)
    cell_ids = cell_ids[1:]  # Drop background.
    masks = [cell_mask == i for i in cell_ids]
    # Prepare output.
    cells = []
    for mask in masks:
        cell = {}
        cell['ID'] = image_id
        cell['ImageHeight'] = id_shape[0]
        cell['ImageWidth'] = id_shape[1]
        cell['Label'] = label
        cell['touches_edge_btlr'] = touches_edge(mask)
        cell['RLEmask'] = encode_binary_mask(mask)
        cells.append(cell)
    return cells

In [None]:
def encode_binary_mask(mask):
    """Converts a binary mask into OID challenge encoding ascii text."""
    # check input mask --
    if mask.dtype != numpy.bool:
        raise ValueError(
            "encode_binary_mask expects a binary mask, received dtype == %s" %
            mask.dtype)
    mask = numpy.squeeze(mask)
    if len(mask.shape) != 2:
        raise ValueError(
            "encode_binary_mask expects a 2d mask, received shape == %s" %
            mask.shape)
    # convert input mask to expected COCO API input --
    mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
    mask_to_encode = mask_to_encode.astype(numpy.uint8)
    mask_to_encode = numpy.asfortranarray(mask_to_encode)
    # RLE encode mask --
    encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]
    # compress and base64 encoding --
    binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
    base64_str = base64.b64encode(binary_str)
    return base64_str

#### Create segmentation masks for test and train set. Save as csv.

In [None]:
importlib.reload(utils)
cells_frame = utils.create_cell_segmentation_rles(
    csv_file="data/sample_submission.csv",
    root_dir="data/test/",
    segmentator=segmentator,
    test=True,
    n_samples=24,
    n_workers=24    
)
timestr = time.strftime("%Y-%m-%d-%H%M%S")
cells_frame.to_csv(f"data/test_cells_{timestr}.csv", index=False)
test_cells.head()

Globbing data/test/.
Processing first 24 images.


In [None]:
importlib.reload(utils)
train_cells = utils.create_cell_segmentation_rles(
    csv_file="data/train.csv",
    root_dir="data/train/",
    segmentator=segmentator
)
timestr = time.strftime("%Y-%m-%d-%H%M%S")
train_cells.to_csv(f"data/train_cells_{timestr}.csv", index=False)
train_cells.head()

Globbing data/train/.


### Check Dataframes and save without index labels

In [12]:
test = pandas.read_csv("data/test_cells.csv")
test2 = test.drop("Unnamed: 0", 1)
test2.head()

Unnamed: 0,ID,ImageHeight,ImageWidth,Label,touches_edge_btlr,RLEmask
0,72ca5902-b93c-4174-b335-bf193b534956,2048,2048,,"[True, False, True, False]",b'eNozyDIOyzE0wAv8DUHQwMDQ39gXXcbbFCaLCvyMIKKE...
1,72ca5902-b93c-4174-b335-bf193b534956,2048,2048,,"[True, False, False, False]",b'eNrNUcsKwkAM/KXMZn2B4EWw6m6yoogHEVs8lB7s/98k...
2,72ca5902-b93c-4174-b335-bf193b534956,2048,2048,,"[True, False, False, False]",b'eNpllOtyo0gMhV+p1eDNJJPKbjIzm2wwBxt8wVz6Athg...
3,72ca5902-b93c-4174-b335-bf193b534956,2048,2048,,"[True, False, False, False]",b'eNoLDUhKNTDMyTc08zb1NTAw9DfyM0ahQSwIDYHIbAgf...
4,72ca5902-b93c-4174-b335-bf193b534956,2048,2048,,"[True, False, False, False]",b'eNrdUlFPwyAQ/kv9CmuMJpolmiw4jkK6mrrsxW7GNsD+...


In [22]:
train = pandas.read_csv("data/train_cells.csv")
train2 = train.drop("Unnamed: 0", 1)
print(len(train2))
train2.head()

491196


Unnamed: 0,ID,ImageHeight,ImageWidth,Label,touches_edge_btlr,RLEmask
0,fc655b1a-bbc3-11e8-b2bc-ac1f6b6435d0,2048,2048,16|5,"[False, False, False, False]",b'eNqNU8FuwyAM/SUbJ9l6mrZT0oK5VKq0qpetmqZe9v+3...
1,fc655b1a-bbc3-11e8-b2bc-ac1f6b6435d0,2048,2048,16|5,"[False, False, False, False]",b'eNqVVGlz2jAQ/UtaX53Q5phOzEzBXuVDfGKDjG1mbIj5...
2,fc655b1a-bbc3-11e8-b2bc-ac1f6b6435d0,2048,2048,16|5,"[False, False, False, False]",b'eNqVVNtOwzAM/SU7SbcJoT1MCJU2dSQGTNpFSGPA2B74...
3,fc655b1a-bbc3-11e8-b2bc-ac1f6b6435d0,2048,2048,16|5,"[False, False, False, False]",b'eNp1VNt2qjAQ/aWZiNDVLm3rWVpbZaIWBQWCgMpFLv//...
4,fc655b1a-bbc3-11e8-b2bc-ac1f6b6435d0,2048,2048,16|5,"[False, False, False, False]",b'eNqNVOt22jAMfiXJTgJll55th2ti+VAoUCgbhbaslK7v...


In [23]:
train2.to_csv("data/train_cells.csv", index=False)
test2.to_csv("data/test_cells.csv", index=False)

### Create a 1/10th Validation set from train data.

In [25]:
len(train2)

491196