Hello Fellow Kagglers,

This notebook demonstrates the pretraining process on 4000 LIVECell samples. The LIVECell dataset is retrieved from [this](https://github.com/google/automl/tree/master/efficientnetv2) GitHub page. The dataset containing preprocessed samples with horizontal and vertical flips, resulting in 4000x4=16000 training samples is created in [this](https://www.kaggle.com/markwijkhuizen/sartorius-livecell-preprocessing) notebook.

This pretraining process should make the training process easier by reducing the distance between the starting and optimal weights. COCO weights are trained on a dataset which is conceptually far from the Sartorius dataset, pretraining on LIVECell should therefore result in better performance.

A custom Mask-RCNN library is used which supports preprocessed LZ4 compressed training samples, allowing for lightning fast training.

In [None]:
# Shut up Tensorflow
!pip install -q silence-tensorflow
import silence_tensorflow.auto

In [None]:
import sys
sys.path.append('../input/sartorius-coco-models/Instance_Segmentation/efficientnetv2')
sys.path.append('../input/sartorius-coco-models/Instance_Segmentation/Mask_RCNN')

In [None]:
# Install LZ4 Compression/Decompression Library
!pip install -q lz4

In [None]:
import matplotlib.pyplot as plt
import mrcnn.utils as utils
import mrcnn.model as modellib
import numpy as np
import pandas as pd
import tensorflow as tf

import os
import sys
import json
import time
import skimage
import imageio
import glob
import imgaug
import multiprocessing
import effnetv2_model

from PIL import Image, ImageDraw
from tqdm.notebook import tqdm
from sklearn.model_selection import KFold
from PIL import Image, ImageEnhance
from mrcnn.config import Config
from mrcnn import visualize

# ignore warnings to make outputs clearer
import warnings
warnings.filterwarnings('ignore')

print(f'Python Version: {sys.version}')
print(f'Tensorflow Version: {tf.__version__}')
print(f'Tensorflow Keras Version: {tf.keras.__version__}')

In [None]:
print('Num GPUs Available: ', len(tf.config.experimental.list_physical_devices('GPU')))

In [None]:
train = pd.read_feather('/kaggle/input/sartorius-livecell-preprocessing-dataset/df_processed.feather')

# Unique Image IDs
id_unique = train['image_id'].unique()

# Original Image File Path
def get_file_path(image_id):
    return f'/kaggle/input/sartorius-livecell-preprocessing-dataset/train/{image_id}/{image_id}.png'

train['file_path'] = train['image_id'].apply(get_file_path)

# Unique Cell Names
CELL_TYPES = np.sort(train['cell_type'].unique())
print(f'CELL_TYPES: {CELL_TYPES}')

# Cell Type to Label Dictionary
CELL_NAMES_DICT = dict([(v, k) for k, v in enumerate(CELL_TYPES)])

# Image Id to Cell Type Label Dictionary
ID2CELL_LABEL = dict(
    [(k, v) for k, v in train[['image_id', 'label']].itertuples(name=None, index=False)]
)

In [None]:
display(train.head())

In [None]:
display(train.info())

In [None]:
# path to COCO-dataset weights
COCO_MODEL_PATH = '../input/sartorius-coco-models/mask_rcnn_coco.h5'

# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

# Training Configuration

In [None]:
HEIGHT = 520
WIDTH = 704
SHAPE = (HEIGHT, WIDTH)

HEIGHT_TARGET = 576
WIDTH_TARGET = 704
SHAPE_TARGET = (HEIGHT_TARGET, WIDTH_TARGET)

BATCH_SIZE = 1
N_SAMPLES = train['image_id'].nunique()
print(f'N_SAMPLES: {N_SAMPLES}')

DEBUG = False
DEBUG_SIZE = 100

In [None]:
EPOCHS_ALL = 2 if DEBUG else (10 * BATCH_SIZE)
print(f'EPOCHS_ALL: {EPOCHS_ALL}')

# Mask-RCNN Config

In [None]:
class CellConfig(Config):
    """Configuration for training on the cigarette butts dataset.
    Derives from the base Config class and overrides values specific
    to the cigarette butts dataset.
    """
    
    NAME = "cell"

    # Set batch size to 1.
    GPU_COUNT = 1
    IMAGES_PER_GPU = BATCH_SIZE
    STEPS_PER_EPOCH = int(DEBUG_SIZE / BATCH_SIZE)  if DEBUG else int(N_SAMPLES / BATCH_SIZE)
    
    # Number of Classes
    NUM_CLASSES = 1 + len(CELL_TYPES)

    # Image Dimensions
    IMAGE_MIN_DIM = HEIGHT_TARGET
    IMAGE_MAX_DIM = WIDTH_TARGET
    IMAGE_SHAPE = [HEIGHT_TARGET, WIDTH_TARGET, 3]
    IMAGE_RESIZE_MODE = 'none'
    
    BACKBONE = 'efficientnetv2-b3'

    # Training Structure
    FPN_CLASSIF_FC_LAYERS_SIZE = 1024
    TOP_DOWN_PYRAMID_SIZE = 256
    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
    # Regions of Interest
    PRE_NMS_LIMIT = 6000
    # Non Max Supression
    POST_NMS_ROIS_TRAINING = 2000
    POST_NMS_ROIS_INFERENCE = 2000
    # Instances
    MAX_GT_INSTANCES = 1000
    TRAIN_ROIS_PER_IMAGE = 500
    DETECTION_MAX_INSTANCES = 500
    
    # Thresholds
    RPN_NMS_THRESHOLD = 0.70        # IoU Threshold for RPN proposals and GT
    DETECTION_MIN_CONFIDENCE = 0.50 # Non-Background Confidence Threshold
    DETECTION_NMS_THRESHOLD = 0.30  # IoU Threshold for ROI and GT
    ROI_POSITIVE_RATIO = 0.33
    
    # Mini Mask
    USE_MINI_MASK = True
    MINI_MASK_SHAPE = (112, 112)
    MASK_SHAPE = (56, 56)
    
    # DO NOT train Batch Normalization because of small batch size
    # There are too few samples to correctly train the normalization
    TRAIN_BN = False
    
    # Learning Rate
    LEARNING_RATE = 0.002
    WEIGHT_DECAY = 0.0
    N_WARMUP_STEPS = 2
    LR_SCHEDULE = True
    
    # Dataloader Queue Size (was set to 100 but resulted in OOM error)
    MAX_QUEUE_SIZE = 10
    
    # Cache Items
    CACHE = True
    
    DEBUG = DEBUG
    
    WORKERS = 0
    
    # Losses
    LOSS_WEIGHTS = {
        'rpn_class_loss': 1.0,    # is the class of the bbox correct? / RPN anchor classifier loss (Forground/Background)
        'rpn_bbox_loss': 1.0,     # is the size of the bbox correct? / RPN bounding box loss graph (bbox of generic object)
        'mrcnn_class_loss': 1.0,  # loss for the classifier head of Mask R-CNN (Background / specific class)
        'mrcnn_bbox_loss': 1.0,   # is the size of the bounding box correct or not? / loss for Mask R-CNN bounding box refinement
        'mrcnn_mask_loss': 1.0,   # is the class correct? is the pixel correctly assign to the class? / mask binary cross-entropy loss for the masks head
    }
    
    ENABLE_XLA = True
    
config = CellConfig()
config.display()

# Dataset

In [None]:
def pad_image(image, constant_values):
    pad_h = (HEIGHT_TARGET - HEIGHT) // 2
    pad_w = (WIDTH_TARGET - WIDTH) // 2
    
    if len(image.shape) == 3:
        return np.pad(image, ((pad_h, pad_h), (pad_w, pad_w), (0,0)), constant_values=constant_values)
    else:
        return np.pad(image, ((pad_h, pad_h), (pad_w, pad_w)), constant_values=constant_values)

In [None]:
# Copy Train Dataset to Working Directory
!cp -r '/kaggle/input/sartorius-livecell-preprocessing-dataset/train' './'

# Copy Preprocessed LZ4 Files
!cp -r '/kaggle/input/sartorius-livecell-preprocessing-dataset/temp' './'

In [None]:
class CellDataset(utils.Dataset):

    def load_data(self, image_ids, form, image_group):
        self.image_group = image_group
   
        for i, name in enumerate(CELL_TYPES):
            self.add_class('cell', 1 + i, name)
       
        # Add the image using the base method from utils.Dataset
        for vf in [True, False]:
            for hf in [True, False]:
                for image in tqdm(image_ids):
                    self.add_image('cell', 
                           image_id=image,
                           path=(f'./{image_group}/{image}/{image}.png'),
                           label = ID2CELL_LABEL[image],
                           height=512, width=512,
                           vertical_flip=vf, horizontal_flip=hf,
                      )
            
            
    def load_mask(self, image_id):
        """ Load instance masks for the given image.
        MaskRCNN expects masks in the form of a bitmap [height, width, instances].
        Args:
            image_id: The id of the image to load masks for
        Returns:
            masks: A bool array of shape [height, width, instance count] with
                one mask per instance.
            class_ids: a 1D array of class IDs of the instance masks.
        """
    
        info = self.image_info[image_id]
        image_id = info['id']
    
        masks = np.load(f'./{self.image_group}/{image_id}/{image_id}_masks.npz')['v']

        class_ids = np.load(f'./{self.image_group}/{image_id}/{image_id}_class_ids.npz')['v']
        
        return masks, class_ids

In [None]:
dataset_train = CellDataset()
dataset_train.load_data(id_unique[:DEBUG_SIZE] if DEBUG else id_unique, 'png', 'train')
dataset_train.prepare()

In [None]:
dataset = dataset_train
image_ids = np.random.choice(dataset.image_ids, 10)
for image_id in tqdm(image_ids):
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset.class_names, limit=1)

# Model

In [None]:
# Create model in training mode
!mkdir 'model_checkpoints'
model = modellib.MaskRCNN(mode='training', config=config, model_dir='model_checkpoints')

In [None]:
init_with = "coco"
exclude = ["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"]
if 'efficientnetv2-' in config.BACKBONE:
    exclude += [
        "fpn_c5p5", "fpn_c4p4", "fpn_c3p3", "fpn_c2p2",
    ]
    
# Modified Fully Connected Layer
if config.FPN_CLASSIF_FC_LAYERS_SIZE != 1024:
    print(f'Excluding FC layer')
    exclude += [
        "mrcnn_class_conv1", "mrcnn_class_bn1", "mrcnn_class_conv2", "mrcnn_class_bn2",
    ]
    
# Modified Top Down Pyramid Size
if config.TOP_DOWN_PYRAMID_SIZE != 256:
    print(f'Excluding Top Down Pyramid Layers')
    exclude += [
        "fpn_p2", "fpn_p3", "fpn_p4", "fpn_p5",
        "rpn_model",
        "mrcnn_class_conv1",
        "mrcnn_mask_conv1", "mrcnn_mask_conv2", "mrcnn_mask_conv3", "mrcnn_mask_conv4",
        "mrcnn_mask_bn1", "mrcnn_mask_bn2", "mrcnn_mask_bn3", "mrcnn_mask_bn4",
        "mrcnn_mask_deconv",
    ]
    
# using coco weights
model.load_weights(
    COCO_MODEL_PATH,
    by_name=True,
    exclude=exclude,
)

In [None]:
# Load EfficientNetV2 Weights Pretrained on Imagenet21K and Fine Tuned on ImageNet1K
model.keras_model.layers[1].load_weights(f'/kaggle/input/sartorius-coco-models/Instance_Segmentation/efficientnetv2_model_checkpoints/{config.BACKBONE}-imagenet21k-ft1k.h5')

# Model Summary

In [None]:
model.show_summary()

# Learning Rate Scheduler

In [None]:
model.plot_lr_schedule(EPOCHS_ALL)

# Training Whole Model

In [None]:
start_train = time.time()
history = model.train(
    dataset_train, None, 
    learning_rate=config.LEARNING_RATE,
    epochs=EPOCHS_ALL, 
    layers="all",
    augmentation=None,
)

end_train = time.time()
minutes = round((end_train - start_train) / 60, 2)
print(f'Training took {minutes} minutes')

# Training History

In [None]:
def plot_history_metric(metric, f_best=np.argmax):
    values = history.history[metric]
    plt.figure(figsize=(15, 8))
    N_EPOCHS = len(values)
    # Epoch Ticks
    if N_EPOCHS <= 20:
        x = np.arange(1, N_EPOCHS + 1)
    else:
        x = [1, 5] + [10 + 5 * idx for idx in range((N_EPOCHS - 10) // 5 + 1)]
    x_ticks = np.arange(1, N_EPOCHS+1)
        
    # summarize history for accuracy
    plt.plot(x_ticks, values, label='train')
    argmin = f_best(values)
    plt.scatter(argmin + 1, values[argmin], color='red', s=75, marker='o', label='train_best')
    
    plt.title(f'Model {metric}', fontsize=24, pad=10)
    plt.ylabel(metric, fontsize=20, labelpad=10)
    plt.xlabel('epoch', fontsize=20, labelpad=10)
    plt.tick_params(axis='x', labelsize=8)
    plt.xticks(x, fontsize=16) # set tick step to 1 and let x axis start at 1
    plt.yticks(fontsize=16)
    plt.legend(prop={'size': 18})
    plt.grid()

In [None]:
plot_history_metric('loss', f_best=np.argmin)

In [None]:
# Region Proposal Network Foreground / Background Classifier
plot_history_metric('rpn_class_loss', f_best=np.argmin)

In [None]:
# Region Proposal Network Bounding Box Loss
plot_history_metric('rpn_bbox_loss', f_best=np.argmin)

In [None]:
# Mask RCNN Head Class Classifier Background / specific class
plot_history_metric('mrcnn_class_loss', f_best=np.argmin)

In [None]:
# # Mask RCNN Head Bounding Box Loss
plot_history_metric('mrcnn_bbox_loss', f_best=np.argmin)

In [None]:
# Mask RCNN Head Object Mask Binary Cross Entropy Loss
plot_history_metric('mrcnn_mask_loss', f_best=np.argmin)

# Inference

In [None]:
class InferenceConfig(CellConfig):
    IMAGES_PER_GPU = 1
    DETECTION_MAX_INSTANCES = 500
    DETECTION_MIN_CONFIDENCE = 0.70    

inference_config = InferenceConfig()
inference_config.display()

In [None]:
# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", config=inference_config, model_dir='model_checkpoints')

In [None]:
if 'efficientnetv2-' in  inference_config.BACKBONE:
    model.keras_model.layers[1].layers[-1].trainable = False

In [None]:
model_path = model.find_last()

# Load trained weights (fill in path to trained weights here)
print('Loading weights from', model_path)
model.load_weights(model_path, by_name=True)

# Visualize Train Predictions

In [None]:
for file_path in glob.glob('./train/*/*.png')[:25]:
    img = skimage.io.imread(file_path)
    img = np.stack((img,img, img), axis=2)
    results = model.detect([img], verbose=1)
    r = results[0]
    
    # Image Id
    image_id = file_path.split('/')[-1].split('.')[0]
    print(f'image_id: {image_id}')
    
    mask = np.load(f'./train/{image_id}/{image_id}_masks.npz')['v']
    mask = mask.sum(axis=2)
    
    plt.figure(figsize=(16,16))
    plt.imshow(mask)
    plt.show()
    
    visualize.display_instances(
        img,
        r['rois'],
        r['masks'],
        r['class_ids'], 
        ['BG'] + CELL_TYPES.tolist(),
        r['scores'],
        figsize=(16,16)
    )

In [None]:
# Clear All Cached Training Samples
model.clear_cache()
# Clear all train images in working directory
!rm -rf train