In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import warnings
warnings.filterwarnings("ignore")
import cv2
from tensorflow import keras
import tensorflow as tf
import keras
from keras import backend as K
from keras.models import Model
from keras.layers import Input
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
from keras.losses import binary_crossentropy
from keras.callbacks import Callback, ModelCheckpoint
from keras.models import load_model
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.utils import image_dataset_from_directory
import PIL,gc,os,sys
import imageio

In [None]:
train_df = pd.read_csv('../input/sartorius-cell-instance-segmentation/train.csv')
print(train_df.shape)
train_df.head(4)

In [None]:
path = ''
image_path = os.path.join(path, '../input/sartoriuscellinstancesegmentationmaskpng/TrainImage2x2/')
mask_path = os.path.join(path, '../input/sartoriuscellinstancesegmentationmaskpng/TrainMask2x2/')
image_list = sorted(os.listdir(image_path))
mask_list = sorted(os.listdir(mask_path))
image_list = [image_path+i for i in image_list]
mask_list = [mask_path+i for i in mask_list]
#print(image_list)

In [None]:
def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background.
    ref: https://www.kaggle.com/inversion/run-length-decoding-quick-start
    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros((shape[0] * shape[1]), dtype=np.float32)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
    return img.reshape(shape)


def rle_encode(img):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    ref: https://www.kaggle.com/dragonzhang/positive-score-with-detectron-3-3-inference
    '''
    pixels = img.flatten()
    pixels = np.concatenate([[0], pixels, [0]])
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

def make_predictions(dataset, num, keras_model):
    '''
    For a tf.Dataset, makes predictions for n=num (num =-1 or all_images takes all images in the dataset), 
    images using a keras_model. Returns a list of predicted masks, each as ndarray. 
    '''
    predictions = []
    if dataset:
        for image in dataset.take(num):
            image = image[None]
            pred_mask = keras_model.predict(image)
            # changes shape from (1,512,512,1) to (512,512)
            pred_mask = pred_mask[0, :, :, 0]
            # fix overlaps
            if check_overlap(msk=pred_mask)==True:
                pred_mask = pred_mask[None]
                pred_mask = fix_overlap(msk=pred_mask)
            # transforms ndarray values to 0s and 1s
            pred_mask =  np.where( pred_mask > 0.5, 1, 0)
            predictions.append(pred_mask)
    return predictions

def get_mask(image_id, df):
    '''
    Uses rle_decode() to get ndarray from mask using image_id in dataframe (df).
    ref: https://www.kaggle.com/barteksadlej123/sartors-tf-starter
    '''
    current = df[df["id"] == image_id]
    labels = current["annotation"].tolist()
    
    mask = np.zeros((HEIGHT, WIDTH))
    for label in labels:
        mask += rle_decode(label, (HEIGHT, WIDTH))
    mask = mask.clip(0, 1)
    
    return mask


#  fix overlaps: 

def check_overlap(msk):
    '''
    Checks if there are overlap in a mask (msk).
    ref: https://www.kaggle.com/awsaf49/sartorius-fix-overlap
    '''
    msk = msk.astype(np.bool).astype(np.uint8)
    return np.any(np.sum(msk, axis=-1)>1)


def fix_overlap(msk):
    '''
    Args:
        mask: multi-channel mask, each channel is an instance of cell, shape:(520,704,None)
    Returns:
        multi-channel mask with non-overlapping values, shape:(520,704,None) 
    ref: https://www.kaggle.com/awsaf49/sartorius-fix-overlap
    '''
    msk = np.array(msk)
    msk = np.pad(msk, [[0,0],[0,0],[1,0]])
    ins_len = msk.shape[-1]
    msk = np.argmax(msk,axis=-1)
    msk = tf.keras.utils.to_categorical(msk, num_classes=ins_len)
    msk = msk[...,1:]
    msk = msk[...,np.any(msk, axis=(0,1))]
    return msk


In [None]:
len(image_list)

In [None]:
image_list[0:4]

In [None]:
mask_list[0:4]

### Check out the some of the unmasked and masked images from the dataset:

In [None]:
N = 1000
img = cv2.imread(image_list[N])
mask = cv2.imread(mask_list[N],cv2.IMREAD_GRAYSCALE)
print(mask.shape)
#mask = np.array([max(mask[i, j]) for i in range(mask.shape[0]) for j in range(mask.shape[1])]).reshape(img.shape[0], img.shape[1])

fig, arr = plt.subplots(1, 2, figsize=(14, 10))
arr[0].imshow(img)
arr[0].set_title('Image')
arr[1].imshow(mask)
arr[1].set_title('Segmentation')

In [None]:
def rle_decode(mask_rle, shape, color=1):
    '''
    mask_rle: run-length as string formated (start length)
    shape: (height,width) of array to return 
    Returns numpy array, 1 - mask, 0 - background

    '''
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros((shape[0] * shape[1], shape[2]), dtype=np.float32)
    for lo, hi in zip(starts, ends):
        img[lo : hi] = color
    return img.reshape(shape)


def build_masks(labels,input_shape, colors=True):
    height, width = input_shape
    if colors:
        mask = np.zeros((height, width, 3))
        for label in labels:
            mask += rle_decode(label, shape=(height,width , 3), color=np.random.rand(3))
    else:
        mask = np.zeros((height, width, 1))
        for label in labels:
            mask += rle_decode(label, shape=(height, width, 1))
    mask = mask.clip(0, 1)
    return mask

def rle2maskResize(rle):
    # CONVERT RLE TO MASK 
    if (len(rle)==0): 
        return np.zeros((256,256) ,dtype=np.uint8)
    
    height= 520
    width = 704
    mask= np.zeros( width*height ,dtype=np.uint8)

    array = np.asarray([int(x) for x in rle.split()])
    starts = array[0::2]-1
    lengths = array[1::2]    
    for index, start in enumerate(starts):
        mask[int(start):int(start+lengths[index])] = 1
    
    return mask.reshape( (height,width), order='F' )[::2,::2]

def make_predictions(dataset, num, keras_model):
    '''
    For a tf.Dataset, makes predictions for n=num (num =-1 or all_images takes all images in the dataset), 
    images using a keras_model. Returns a list of predicted masks, each as ndarray. 
    '''
    predictions = []
    if dataset:
        for image in dataset.take(num):
            image = image[None]
            pred_mask = keras_model.predict(image)
            # changes shape from (1,512,512,1) to (512,512)
            pred_mask = pred_mask[0, :, :, 0]
            # fix overlaps
            if check_overlap(msk=pred_mask)==True:
                pred_mask = pred_mask[None]
                pred_mask = fix_overlap(msk=pred_mask)
            # transforms ndarray values to 0s and 1s
            pred_mask =  np.where( pred_mask > 0.5, 1, 0)
            predictions.append(pred_mask)
    return predictions

### 2.1 - Split Your Dataset into Unmasked and Masked Images

In [None]:
image_list_ds = tf.data.Dataset.list_files(image_list, shuffle=False)
mask_list_ds = tf.data.Dataset.list_files(mask_list, shuffle=False)

for path in zip(image_list_ds.take(3), mask_list_ds.take(3)):
    print(path)

In [None]:
image_filenames = tf.constant(image_list)
masks_filenames = tf.constant(mask_list)

dataset = tf.data.Dataset.from_tensor_slices((image_filenames, masks_filenames))

for image, mask in dataset.take(5):
    print(image)
    print(mask)

In addition, the image color values are normalized to the [0,1] range.

In [None]:
def normalize(input_image, input_mask):
    input_image = tf.cast(input_image, tf.float32) / 255.0
    input_mask -= 1
    return input_image, input_mask

In [None]:
def load_image(input_image1, input_mask1):
    input_image = tf.image.resize(input_image1, (512, 512))
    input_mask = tf.image.resize(input_mask1, (512, 512))

    input_image, input_mask = normalize(input_image, input_mask)

    return input_image, input_mask

In [None]:
# constants

DEBUG = False

SEED = 123
WIDTH, HEIGHT = 260, 352
RESIZE_WIDTH, RESIZE_HEIGHT = 256, 256
BATCH_SIZE = 32
BUFFER_SIZE = 32
VAL_SPLIT = 0.2

AUTO = tf.data.AUTOTUNE

EPOCHS = 20

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(image_list, mask_list, test_size=VAL_SPLIT, random_state=SEED)
print(X_train[0:4])
print(y_train[0:4])

In [None]:
X_train_ds = tf.data.Dataset.list_files(X_train, shuffle=False)
X_test_ds = tf.data.Dataset.list_files(X_test, shuffle=False)
y_train_ds = tf.data.Dataset.list_files(y_train, shuffle=False)
y_test_ds = tf.data.Dataset.list_files(y_test, shuffle=False)

In [None]:
X_train_filenames = tf.constant(X_train)
X_test_filenames = tf.constant(X_test)
y_train_filenames = tf.constant(y_train)
y_test_filenames = tf.constant(y_test)
print(X_train_filenames)

In [None]:
###### tempimg = imageio.imread(X_train[1])
tempmask = cv2.imread(y_train[1],cv2.IMREAD_GRAYSCALE)
tempimg = cv2.imread(X_train[1])
print(tempmask.shape)
print(tempimg.shape)
tempmask = cv2.resize(tempmask,(RESIZE_WIDTH,RESIZE_WIDTH))
tempimg = cv2.resize(tempimg,(RESIZE_WIDTH,RESIZE_WIDTH))
print(tempmask.shape)
print(tempimg.shape)
fig, arr = plt.subplots(1, 2, figsize=(14, 10))
arr[0].imshow(tempimg)
arr[0].set_title('Image')
arr[1].imshow(tempmask)
arr[1].set_title('Segmentation')

In [None]:
def train_generator(X,y):
    
    for image_id in X:
        image = cv2.imread(image_id) 
                
        image = cv2.resize(image, (RESIZE_HEIGHT, RESIZE_WIDTH))
        
        image = image.astype(np.float32)
    for image_id in y:
        mask = cv2.imread(image_id,cv2.IMREAD_GRAYSCALE) 
        
        
        mask = cv2.resize(mask, (RESIZE_HEIGHT, RESIZE_HEIGHT))
        mask = mask.reshape((*mask.shape, 1))
        mask = mask.astype(np.int32)
    yield image, mask

In [None]:
# use the generator to get training and validation sets
train_ds = tf.data.Dataset.from_generator(
    lambda : train_generator(X_train,y_train), 
    output_types=(tf.float32, tf.int32),
    output_shapes=((RESIZE_HEIGHT, RESIZE_WIDTH,3), (RESIZE_HEIGHT, RESIZE_WIDTH,1)))

valid_ds = tf.data.Dataset.from_generator(
    lambda : train_generator(X_test,y_test), 
    output_types=(tf.float32, tf.int32),
    output_shapes=((RESIZE_HEIGHT, RESIZE_WIDTH,3), (RESIZE_HEIGHT, RESIZE_WIDTH,1)))

The following class performs a simple augmentation by randomly-flipping an image. Go to the Image augmentation tutorial to learn more.

In [None]:
class Augment(tf.keras.layers.Layer):
    def __init__(self, seed=SEED):
        super().__init__()
    # both use the same seed, so they'll make the same random changes.
        self.augment_inputs = tf.keras.layers.RandomFlip(mode="horizontal", seed=seed)
        self.augment_labels = tf.keras.layers.RandomFlip(mode="horizontal", seed=seed)

    def call(self, inputs, labels):
        inputs = self.augment_inputs(inputs)
        labels = self.augment_labels(labels)
        return inputs, labels

In [None]:
# "build the input pipeline, applying the augmentation after batching the inputs"

train_ds = (
    train_ds
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE)
    .repeat()
    .map(Augment())
    .prefetch(AUTO))

valid_ds = (
    valid_ds
    .batch(BATCH_SIZE)
    .repeat()
    .prefetch(AUTO))

In [None]:
train_ds.take(2)

#### Visualize an image example and its corresponding mask from the dataset.

In [None]:
def display(display_list):
    plt.figure(figsize=(20, 20))

    title = ['Input Image', 'True Mask','Predicted Mask']

    for i in range(len(display_list)):
        plt.subplot(1, len(display_list), i+1)
        plt.title(title[i])
        try:
            plt.imshow(tf.keras.utils.array_to_img(display_list[i]))
        except:
            plt.imshow(display_list[i])
        plt.axis('off')
    plt.show()

In [None]:
for images, masks in train_ds.take(2):
    sample_image, sample_mask = images[0], masks[0]
    print(sample_image.shape)
    print(sample_mask.shape)
    display([sample_image, sample_mask])

In [None]:
from keras import backend as K
from keras.losses import binary_crossentropy
import tensorflow as tf

def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def iou_coef(y_true, y_pred, smooth=1):
    intersection = K.sum(K.abs(y_true * y_pred), axis=[1,2,3])
    union = K.sum(y_true,[1,2,3])+K.sum(y_pred,[1,2,3])-intersection
    iou = K.mean((intersection + smooth) / (union + smooth), axis=0)
    return iou

def dice_loss(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = y_true_f * y_pred_f
    score = (2. * K.sum(intersection) + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
    return 1. - score

def bce_dice_loss(y_true, y_pred):
    return binary_crossentropy(tf.cast(y_true, tf.float32), y_pred) + 0.5 * dice_loss(tf.cast(y_true, tf.float32), y_pred)

In [None]:
# !pip download segmentation_models

In [None]:
!pip install segmentation_models --no-index --find-links ../input/cell-segmentation

In [None]:
import segmentation_models as sm
sm.set_framework('tf.keras')
sm.framework()

In [None]:
BACKBONE = 'resnet34'
preprocess_input = sm.get_preprocessing(BACKBONE)

In [None]:
train_ds = preprocess_input(train_ds)
valid_ds = preprocess_input(valid_ds)

In [None]:
from segmentation_models import Unet
from segmentation_models.utils import set_trainable


base_model = Unet('efficientnetb3',input_shape=(256, 256, 3), classes=3, activation='sigmoid',encoder_weights='imagenet')
inp = Input(shape=(256, 256, 3))
l1 = base_model(inp)
out = Conv2D(1, (1, 1))(l1) # map N channels data to 3 channels
model = Model(inp, out, name=base_model.name)
model.compile(optimizer='adam', loss=bce_dice_loss,metrics=[dice_coef,iou_coef,'accuracy']) #bce_dice_loss binary_crossentropy
model.summary()


In [None]:
# try out the model to check what it predicts before training

def create_mask(pred_mask):
    pred_mask = tf.where(pred_mask > 0.5,1,0)
    return pred_mask


def show_predictions(dataset=None, num=1):
    if dataset:
        for image, mask in dataset.take(num):
            pred_mask = model.predict(image)
            pred_mask = pred_mask.reshape((256,256,1))
            display([image[0], mask[0], pred_mask])
    else:
        display([sample_image, sample_mask,
                 create_mask(model.predict(sample_image[tf.newaxis, ...])[0])])

        
show_predictions(train_ds)


In [None]:

from keras.callbacks import Callback, ModelCheckpoint
checkpoint = ModelCheckpoint(
    'model1.h5', 
    monitor='val_loss', 
    verbose=0, 
    save_best_only=True, 
    save_weights_only=False,
    mode='auto'
)

history = model.fit(
    train_ds,
    validation_data=valid_ds,
    steps_per_epoch=60,
    validation_steps=10,
    callbacks=[checkpoint],
    use_multiprocessing=False,
    workers=4,
    epochs=10,
    verbose = 1
)
hist_df = pd.DataFrame(history.history)
hist_df.to_csv('history.csv')
# PLOT TRAINING
plt.figure(figsize=(15,5))
plt.plot(range(history.epoch[-1]+1),history.history['val_iou_coef'],label='Val_iou_coef')
plt.plot(range(history.epoch[-1]+1),history.history['iou_coef'],label='Trn_iou_coef')
plt.title('IOU'); plt.xlabel('Epoch'); plt.ylabel('iou_coef');plt.legend(); 
plt.show()

# PLOT TRAINING
plt.figure(figsize=(15,5))
plt.plot(range(history.epoch[-1]+1),history.history['val_iou_coef'],label='Val_iou_coef')
plt.plot(range(history.epoch[-1]+1),history.history['iou_coef'],label='Trn_iou_coef')
plt.title('IOU'); plt.xlabel('Epoch'); plt.ylabel('iou_coef');plt.legend(); 
plt.show()

model.save('./model.h5')


In [None]:
loaded_model = load_model('../input/k/pasanjayaweera/cell-segmentation/model.h5', compile = False)

In [None]:
#model.save('./model.h5')

In [None]:
test_img = cv2.imread('../input/sartoriuscellinstancesegmentationmaskpng/TrainImage2x2/0030fd0e6378_3.png')
true_mask = cv2.imread('../input/sartoriuscellinstancesegmentationmaskpng/TrainMask2x2/0030fd0e6378_3_mask.png')
test_img = cv2.resize(test_img,(256,256))
true_mask = cv2.resize(true_mask,(256,256))
test_img1 = np.expand_dims(test_img,axis = 0)
pred_mask = loaded_model.predict(test_img1)
pred_mask = pred_mask.reshape((256,256,1))
display([test_img, true_mask, pred_mask])

#### Prediction and submission

In [None]:
test_path = '../input/sartorius-cell-instance-segmentation/test/'
test_ids = [  os.path.join(test_path, each)  for each in os.listdir(test_path) if each.endswith('.png')]
def test_generator(image_ids):
    for image_id in image_ids:
        image = cv2.imread(image_id) 
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)        
        image = cv2.resize(image, (RESIZE_HEIGHT, RESIZE_WIDTH))
        image = image.astype(np.float32)
        yield image
        
# test dataset from test data generator 
test_ds = tf.data.Dataset.from_generator(
    lambda : test_generator(test_ids), 
    output_types=(tf.float32),
    output_shapes=((RESIZE_HEIGHT, RESIZE_WIDTH, 3)) )
# test image ids and predictions
test_predictions = make_predictions(dataset=test_ds, num=len(test_ids), keras_model=model)


In [None]:
# encode predections in the RL format
test_predictions = [rle_encode(mask) for mask in test_predictions] 
#print(test_predictions)

In [None]:
# transform full image paths to ids 
from pathlib import Path
test_ids = [Path(ID).stem for ID in test_ids]

In [None]:
# generate submission data frame 
submisssion = pd.DataFrame.from_dict({'id': test_ids, 'predicted': test_predictions} )
submisssion = submisssion.sort_values( ['id'], ascending=True )
print(submisssion.head(), 'n')
csv_output = os.path.join('./', 'submission.csv') 
submisssion.to_csv(csv_output, index=False)