In this notebook I implement a U-Net model with less than 90,000 parameters. The model makes use of inverted residual blocks as the main processing blocks of the models and employs pyramid scene parsing in the 'horizontal' connections of U-Net. The model is pretrained on the LIVCell dataset for 3 epochs before being trained on the competition data. The model performs well on selecting out cells from the image, but performs poorly on the discriminatory masking required for the Sartorius competition. To put it simply, it does well at picking out all cells in an image but struggles to pick out a specific type of cell. 

More detailed labeled images could help solve this problem. Multiple notebooks in this competition use Detectron2 to label all cell types present in the samples, but at that point one might as well use the Detectron2 model to mask your data.

While the model is far from the top scoring, the fact that it scores at all is a win for me. It does well as a simple cell masker. It was fun to create such a small model! 

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from glob import glob
from scipy.io import loadmat
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
import tensorflow_io as tfio

# Model Generation

In [None]:
###
### Defining the blocks which make up the model
###

def inverted_residual_block(x, expand=64, squeeze=16):
    m = keras.layers.Conv2D(expand, 1, padding='same')(x)
    m = keras.layers.BatchNormalization()(m)
    m = keras.layers.Activation('relu')(m)
    
    m = keras.layers.DepthwiseConv2D(3, padding='same')(m)
    m = keras.layers.BatchNormalization()(m)
    m = keras.layers.Activation('relu')(m)
    
    m = keras.layers.Conv2D(squeeze, 1, padding='same')(m)
    m = keras.layers.BatchNormalization()(m)
    m = keras.layers.Activation('relu')(m)
    
    return keras.layers.add([m, x])

def down_sampling_block(x,filters):
    m = keras.layers.MaxPooling2D(3, strides=2, padding='same')(x)
    # project residual
    r = keras.layers.Conv2D(filters, 1, strides=2, padding='same')(x)
    r = keras.layers.BatchNormalization()(r)
    r = keras.layers.Activation('relu')(r)
    return keras.layers.add([m, r])

def up_sampling_block(x, filters):
    m = keras.layers.UpSampling2D(2)(x)
    # project residual
    r = keras.layers.UpSampling2D(2)(x)
    # project residual
    r = keras.layers.Conv2D(filters, 1, padding='same')(r)
    r = keras.layers.BatchNormalization()(r)
    r = keras.layers.Activation('relu')(r)
    return keras.layers.add([m, r])


def pyramid_parsing(x, depth=0, img_size=(520, 704),
                    filters=8, f_ratio=8, pyramid=[2,3,6,9]):
    HEIGHT, WIDTH = img_size
    HEIGHT, WIDTH = HEIGHT // 2**depth, WIDTH // 2**depth # the model down samples by 2 for each layer of given 'depth'
        
    y = x
    for bin_val in pyramid:
        
        m = keras.layers.MaxPool2D(pool_size=(HEIGHT//bin_val, WIDTH//bin_val),
                                   padding='same')(x)
        # check if the bin size is larger due to the padding
        m_HEIGHT, m_WIDTH = m.get_shape()[1:3]

            
        m = keras.layers.UpSampling2D(size=(HEIGHT//m_HEIGHT, WIDTH//m_WIDTH))(m)
        # as it stands, with proper use of flooring values and calling the bins, 
        # the layers (if they do not match perfectly) are slightly undersized by several pixels. 
        # resize allows to use nearest neighbor interpolation to extend the edge bins inplace of zero padding. 
        m = keras.layers.Resizing(HEIGHT, WIDTH, interpolation='nearest')(m)
        
        
        # scale down the amount of filters
        m = keras.layers.Conv2D(filters//f_ratio, 1, padding='same')(m)
        m = keras.layers.BatchNormalization()(m)
        m = keras.layers.Activation('relu')(m)
        y = keras.layers.Concatenate()([y, m])
        
    return y

In [None]:
def get_model(img_size, channels, num_classes, filter_list = [8, 16, 32, 64], PSP=False):
    
    
    """
    Creates the U-net model.
    
    Parameters
    ----------
    img_size: tuple
        The dimensions of the input samples (x,y)
    channels: int
        The channels of the input samples
    filter_list: list
        A list of the desired filters to construct the model with. The length of the list defines the depth of the model,
        with the final value being the 'bottom' layer.
    num_classes: int
        The amount of desired output channels.
    PSP: bool
        Defines if the model should use Pyramid Scene Parsing in its layers.
        
    Returns
    -------
    tf.keras Functional API model, samples have shape of (img_size[0], img_size[1], num_classes.)
    
    """
    max_filter = filter_list[-1]
    filter_list = filter_list[:-1]
    
    
    inputs = keras.Input(shape=img_size + (channels,))
    x = inputs # redefine for the loop

    ### Downsampling layers ###
    
    horizontal_connections = [] # collect the outputs of layers to use in the horizontal connection of U-net
    
    depth = 0
    # Used in pyramid parsing
    # for repeating halfing of size
    # 2**depth
    
    for filters in filter_list:
        # point wise convolution to expand filters. 
        x = keras.layers.Conv2D(filters, 1, padding='same')(x)
        x = keras.layers.BatchNormalization()(x)
        x = keras.layers.Activation('relu')(x)
        
        x = inverted_residual_block(x, expand=filters*2, squeeze=filters)
        x = inverted_residual_block(x, expand=filters*2, squeeze=filters)

        
        if PSP:
            context_stack = pyramid_parsing(x, depth=depth, img_size=img_size,
                                        filters=filters, f_ratio=filters)
            horizontal_connections.append(context_stack)
            depth += 1
        
        else:
            horizontal_connections.append(x)
        
        x = down_sampling_block(x,filters=filters)
        
        
    ### Bottom layer ### 
    # no horizontal connection
    bottom_filter = max_filter
    x = keras.layers.Conv2D(bottom_filter, 1, padding='same')(x)
    x = keras.layers.BatchNormalization()(x)
    x = keras.layers.Activation('relu')(x)
    
    x = inverted_residual_block(x, expand=bottom_filter*2, squeeze=bottom_filter)
    x = inverted_residual_block(x, expand=bottom_filter*2, squeeze=bottom_filter)
        
    ### Upsampling layers ###

        
    for filters, h_con in zip(filter_list[::-1], horizontal_connections[::-1]):
        # upsample, add the horizontal components
        x = up_sampling_block(x, filters=filters*2)
        x = keras.layers.Concatenate()([x,h_con])
        

        # shrink filters to desired size
        x = keras.layers.Conv2D(filters, 1, padding='same')(x)
        x = keras.layers.BatchNormalization()(x)
        x = keras.layers.Activation('relu')(x)

        
        x = inverted_residual_block(x, expand=filters*2, squeeze=filters)
        x = inverted_residual_block(x, expand=filters*2, squeeze=filters)
        
        
        
     # Add a per-pixel classification layer

    x = keras.layers.Conv2D(num_classes, 3, padding='same')(x)
    x = keras.layers.BatchNormalization()(x)
    outputs = keras.layers.Activation('sigmoid')(x)
    


    model = keras.Model(inputs, outputs)
    return model

In [None]:
HEIGHT, WIDTH = 520, 704
image_size = (HEIGHT, WIDTH)
model = get_model(image_size, channels=1, num_classes=1, PSP=True)
model.load_weights('/kaggle/input/cell-model-weights/spyramid_8/spyramid_8_epoch') # load the weights from training

model.summary()

# A Look at the mask performance

In [None]:
sample_files = ['../input/sartorius-cell-instance-segmentation/test/7ae19de7bc2a.png',
               '../input/sartorius-cell-instance-segmentation/test/d48ec7815252.png',
               '../input/sartorius-cell-instance-segmentation/test/d8bfd1dafdc4.png']


for file_path in sample_files:
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16,16), sharey=True)

    input_test = plt.imread(str(file_path)).reshape(1, 520, 704, 1)
    input_test = (input_test) - np.mean(input_test) # 
    
    pred = model.predict(input_test) 
    
    ax1.imshow(input_test[0])
    ax2.imshow(pred[0])

As mentioned, the model can actually select out the cells present in the sample. If one wanted to simply select all cells in an image, this is not a bad masker.

# Creating Mask output

In [None]:
import skimage.morphology

############ Submission
def rle_encode(img):
    '''
    img: numpy array. 
        1 - mask, 0 - background
        
    Returns run length as string formated
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def get_regions(x, THRESHOLD, min_size=25):
    """
    Converts predictions to rle encoded masks.
    
    Parameters
    --------------
    x: numpy array
        Predictions from our model.
    THRESHOLD: float
        The threshold value were we consider a pixel a cell.
    min_size: int
        The minimum size (length) of a mask to be included in our final output.
        
    Returns
    --------------
    res: list
        list of masks encoded into rle encodings. 
    """
    res = []
    regions = skimage.morphology.label(x>THRESHOLD) # select out seperate regions.
    for i in range(1, regions.max() +1):
        rle = rle_encode(regions==i)
        if len(rle) >= min_size: # skip predictions with small area
#             print(len(rle))
            res.append(rle)
        else:
            continue
    return res

In [None]:
test_files = glob('../input/sartorius-cell-instance-segmentation/test/*') # grab all test files

sample_submission = pd.read_csv('../input/sartorius-cell-instance-segmentation/sample_submission.csv')
output_df = pd.DataFrame(data = None, columns = sample_submission.columns)


count = 0 # running count to idx the output df

### Loop over all test files ###
for file_path in test_files:
    tag = file_path.split('/')[-1][:-4] # splits the sample name off from the filepath
    print(tag)
    input_test = plt.imread(str(file_path)).reshape(1, 520, 704, 1) # extra channel in the front so easy input into the model.
    input_test = (input_test) - np.mean(input_test) # zero center the data

    pred = model.predict(input_test)
    regions = get_regions(pred[0], 0.5, min_size=25)
    
    
    for mask in regions:
        output_df.loc[count] = tag, mask
        count+= 1

In [None]:
output_df

In [None]:
output_df.to_csv('/kaggle/working/submission.csv', index = False)
