Hello fellow Kagglers,

This notebook demonstrates a first attempt at segmenting cell pixels using [this](https://www.kaggle.com/markwijkhuizen/sartorius-preprocessing-kfolds-public) training data.

The training process only classifies each pixel as cell or not cell, it does not segment instances. The predicted masks could however be used as input to isolate the instance segmentation task from the segmentation task.

The model is inspired on a upsampling architecture explained in this awesome [Tensorflow Image Segmentation](https://www.tensorflow.org/tutorials/images/segmentation) tutorial.

**Update V2**

- All data is used for training, since the train/val metrics are approximately equal and usage of all data for training should result in a better performing model.
- Train images do not have enhanced contrast for better performance.
- Changed backbone model from EfficientNetV2-B1 to EfficientNetV2-S
- The segmentation mask is used to compute the confidence levels of instances in [this](https://www.kaggle.com/markwijkhuizen/sartorius-mask-rcnn-efficientnetv2-inference) inference notebook (e.t.a. 05-12-2021)

In [None]:
# Silence Tensorflow
!pip install -q silence-tensorflow
import silence_tensorflow.auto

In [None]:
# Load the EfficientNetV2 Library
# Source: https://github.com/google/automl/tree/master/efficientnetv2
import sys
sys.path.append('/kaggle/input/efficientnetv2-pretrained-imagenet21k-weights/brain_automl/')
sys.path.append('/kaggle/input/efficientnetv2-pretrained-imagenet21k-weights/brain_automl/efficientnetv2/')

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from tensorflow.keras.mixed_precision import experimental as mixed_precision
from kaggle_datasets import KaggleDatasets
from tqdm.notebook import tqdm
from multiprocessing import cpu_count
from sklearn import metrics

import effnetv2_model
import re
import os
import io
import time
import pickle
import math
import random
import sys
import cv2
import gc

print(f'tensorflow version: {tf.__version__}')
print(f'tensorflow keras version: {tf.keras.__version__}')
print(f'python version: P{sys.version}')

In [None]:
# Seed all random number generators
def seed_everything(seed):
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    
seed_everything(42)

In [None]:
DEBUG = False

# Image dimensions
HEIGHT = 520
WIDTH = 704
# Image Size with padding divisable by 32 for correct upsampling
HEIGHT_TARGET = 544
WIDTH_TARGET = 704
N_CHANNELS = 3
INPUT_SHAPE = (HEIGHT_TARGET, WIDTH_TARGET, N_CHANNELS)

# EfficientNet version, b0/b1/b2/b3/s/m/l/xl
EFN_SIZE = 's'

# Batch size
BATCH_SIZE = 8

# Learning Rate
LR_MAX = 0.2
# Number of Epochs
EPOCHS = 20

# Whether to use all data for training
USE_ALL_TRAINING_DATA = True

# Tensorflow AUTO flag
AUTO = tf.data.experimental.AUTOTUNE

print(f'BATCH_SIZE: {BATCH_SIZE}')

# Help Functions

In [None]:
# Random integer
@tf.function()
def tf_rand_int(minval, maxval):
    minval = tf.cast(minval, tf.int64)
    maxval = tf.cast(maxval, tf.int64)
    return tf.random.uniform(shape=(), minval=minval, maxval=maxval, dtype=tf.int64)

# Change of 1 in K
@tf.function()
def one_in(k):
    return 0 == tf_rand_int(0, k)

# Model

In [None]:
# Inspiration: https://www.tensorflow.org/tutorials/generative/pix2pix#build_an_input_pipeline_with_tfdata
def upsample(x, concat, filters, size, name, dropout=0.0):
    initializer = tf.random_normal_initializer(0., 0.02)

    x = tf.keras.layers.Conv2DTranspose(
            filters, # Number of Convolutional Filters
            size, # Kernel Size
            strides=2, # Kernel Steps
            padding='SAME', # Keep Dimensions
            kernel_initializer=initializer, # Weight Initializer
            use_bias=False, # Do not use Bias only Weights
            name=f'Conv2DTranspose_{name}' # Name of Layer
        )(x)
    
    x = tf.keras.layers.BatchNormalization(name=f'BatchNormalization_{name}')(x)

    if dropout > 0.0:
        x = tf.keras.layers.Dropout(dropout, name=f'Dropout_{name}')(x)

    x = tf.keras.layers.ReLU(name=f'ReLy_{name}')(x)
    x = tf.keras.layers.Concatenate(name=f'Concatenate_{name}')([x, concat])

    return x

In [None]:
def get_model(dropout=0.00, file_path=None):
    tf.keras.backend.clear_session()
    # enable XLA optmizations
    tf.config.optimizer.set_jit(True)
    
    # EfficientNetV2 Backbone
    cnn = effnetv2_model.get_model(f'efficientnetv2-{EFN_SIZE}', include_top=False, weights=None)
    cnn.trainable = True

    # Inputs, note the names are equal to the dictionary keys in the dataset
    grayscale_image = tf.keras.layers.Input([HEIGHT_TARGET, WIDTH_TARGET, 1], name='image', dtype=tf.float32)

    # CNN call, we need only the output layer
    rgb_image = tf.keras.layers.Conv2D(3, kernel_size=1, strides=1)(grayscale_image)
    embedding, up5, up4, up3, up2, up1 = cnn(rgb_image, with_endpoints=True)
    print(f'embedding shape: {embedding.shape} up1 shape: {up1.shape}, up2 shape: {up2.shape}')
    print(f'up3 shape: {up3.shape}, up4 shape: {up4.shape}, up5 shape: {up5.shape}')


    x = upsample(up1, up2, up2.shape[-1] * 2, 3, 'upsample1_17x22', dropout=dropout)
    x = upsample(x, up3, up3.shape[-1] * 2, 3, 'upsample2_34x44', dropout=dropout)
    x = upsample(x, up4, up4.shape[-1] * 2, 3, 'upsample3_68x88', dropout=dropout)
    x = upsample(x, up5, up5.shape[-1] * 2, 3, 'upsample4_136x176', dropout=dropout)

    output = tf.keras.layers.Conv2DTranspose(
            filters=1,
            kernel_size=3,
            strides=2,
            padding='same',
            activation='sigmoid'
        )(x)


    # We will use the famous Adam optimizer for fast learning
    optimizer = tf.keras.optimizers.SGD(nesterov=True, momentum=0.00)

    # Categorical Cross Entropy loss, from_logits=True so no softmax needed
    loss = tf.keras.losses.BinaryCrossentropy()

    # Metrics
    metrics = [
        tf.keras.metrics.AUC(),
        tf.keras.metrics.BinaryAccuracy(),
        tf.keras.metrics.Precision(),
        tf.keras.metrics.Recall(),
    ]

    model = tf.keras.models.Model(inputs=grayscale_image, outputs=output)
    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

    if file_path:
        print('Loading pretrained weights...')
        model.load_weights(file_path)

    return model

In [None]:
model = get_model(dropout=0.10, file_path=None)

In [None]:
# Plot model summary
model.summary()

In [None]:
# Take a good look at the model architecture
# The upsampling block are concatenated with the CNN filters
tf.keras.utils.plot_model(model, show_shapes=True, show_dtype=True, show_layer_names=True, expand_nested=False)

# Datasets

In [None]:
def benchmark_dataset(dataset, num_epochs=3, n_steps_per_epoch=25, bs=BATCH_SIZE):
    start_time = time.perf_counter()
    for epoch_num in range(num_epochs):
        for idx, (images, labels) in enumerate(dataset.take(n_steps_per_epoch + 1)):
            if idx == 0:
                epoch_start = time.perf_counter()
            elif idx == 1 and epoch_num == 0:
                print(f'image shape: {images.shape}, image dtype: {images.dtype}')
            else:
                pass
        epoch_t = time.perf_counter() - epoch_start
        mean_step_t = round(epoch_t / n_steps_per_epoch * 1000, 1)
        n_imgs_per_s = int(1 / (mean_step_t / 1000) * bs)
        print(f'epoch {epoch_num} took: {round(epoch_t, 2)} sec, mean step duration: {mean_step_t}ms, images/s: {n_imgs_per_s}')

In [None]:
# Normalize and pad the image and label
def process_image(image, label):
    # Padding
    pad_h = (HEIGHT_TARGET - HEIGHT) // 2
    pad_w = (WIDTH_TARGET - WIDTH) // 2
    paddings = [[pad_h, pad_h], [pad_w, pad_w]]
    
    # Image
    image = tf.cast(image, tf.float32)
    image = ((image - 128) / 128) / 0.108
    image = tf.pad(image, paddings=paddings)
    image = tf.expand_dims(image, axis=2)
    image = tf.repeat(image, repeats=N_CHANNELS, axis=2)
    
    # Label
    label = tf.pad(label, paddings=paddings)
    label = tf.expand_dims(label, axis=2)
    
    return image, label

In [None]:
@tf.function(experimental_compile=True)
def process_image(image, label):
    # Padding
    pad_h = (HEIGHT_TARGET - HEIGHT) // 2
    pad_w = (WIDTH_TARGET - WIDTH) // 2
    paddings = [[pad_h, pad_h], [pad_w, pad_w]]
    
    # Image
    image = tf.pad(image, paddings=paddings, constant_values=128)
    image = tf.expand_dims(image, axis=2)
    
    # Label
    label = tf.pad(label, paddings=paddings, constant_values=0)
    label = tf.expand_dims(label, axis=2)
    
    # Horizontal Flip
    if one_in(2):
        image = tf.image.flip_left_right(image)
        label = tf.image.flip_left_right(label)
    
    # Vertical Flip
    if one_in(2):
        image = tf.image.flip_up_down(image)
        label = tf.image.flip_up_down(label)
        
    # Normalise
    image = tf.cast(image, tf.float32)
    image = ((image - 128) / 128) / 0.589
    
    return image, label

In [None]:
# Plots a batch of images
def show_batch(dataset, rows=4, cols=4):
    imgs, lbls = next(iter(dataset))
    imgs = imgs.numpy()
    # De normalise images
    imgs = (((imgs * 0.589) * 128) + 128).astype(np.uint8)
    # Plot
    fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize=(rows*6, cols*4))
    for r in range(rows):
        for c in range(cols // 2):
            img = imgs[r*cols+c]
            axes[r, c*2].imshow(img)
            lbl = lbls[r*cols+c]
            axes[r, c*2+1].imshow(lbl)

# Train Dataset

In [None]:
def get_train_dataset(fold, bs=BATCH_SIZE, print_shape=False, return_steps=False, repeat=True):
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False
    
    X = np.load(f'/kaggle/input/sartorius-kfolds/X_fold_{fold}_train.npz')['v']
    y = np.load(f'/kaggle/input/sartorius-kfolds/y_fold_{fold}_train.npz')['v']
    
    # Use all Training data
    if USE_ALL_TRAINING_DATA:
        X_val = np.load(f'/kaggle/input/sartorius-kfolds/X_fold_{fold}_val.npz')['v']
        y_val = np.load(f'/kaggle/input/sartorius-kfolds/y_fold_{fold}_val.npz')['v']
        # Concatenate
        X = np.concatenate((X, X_val))
        y = np.concatenate((y, y_val))
    
    if print_shape:
        print(f'X shape: {X.shape}, y shape: {y.shape}')
    
    train_dataset = tf.data.Dataset.from_tensor_slices((X, y))
    train_dataset = train_dataset.with_options(ignore_order)
    train_dataset = train_dataset.shuffle(len(y), reshuffle_each_iteration=True)
    # Don't repeat when validating training
    if repeat:
        train_dataset = train_dataset.repeat()
    train_dataset = train_dataset.map(process_image, num_parallel_calls=1)
    train_dataset = train_dataset.batch(bs)
    train_dataset = train_dataset.prefetch(1)
    
    if return_steps:
        return train_dataset, math.ceil(len(X) / bs)
    else:
        return train_dataset

In [None]:
# Benchmark Dataset, dataloader won't form a bottleneck
benchmark_dataset(get_train_dataset(0))

In [None]:
# Show Image and Label Statistics as Sanity Check
images, labels = next(iter(get_train_dataset(0, print_shape=True)))
print(f'images shape: {images.shape}, labels shape: {labels.shape}')
print(f'images dtype: {images.dtype}, labels dtype: {labels.dtype}')
print(f'images min: {np.min(images):.2f}, max: {np.max(images):.2f}')
print(f'images mean: {np.mean(images):.2f}, std: {np.std(images):.2f}')

In [None]:
# Plot some training images
show_batch(get_train_dataset(0, bs=32))

# Val Dataset

In [None]:
@tf.function(experimental_compile=True)
def process_image_val(image, label):
    # Padding
    pad_h = (HEIGHT_TARGET - HEIGHT) // 2
    pad_w = (WIDTH_TARGET - WIDTH) // 2
    paddings = [[pad_h, pad_h], [pad_w, pad_w]]
    
    # Image
    image = tf.pad(image, paddings=paddings, constant_values=128)
    image = tf.expand_dims(image, axis=2)
    image = tf.cast(image, tf.float32)
    image = ((image - 128) / 128) / 0.589
    
    # Label
    label = tf.pad(label, paddings=paddings, constant_values=0)
    
    return image, label

In [None]:
def get_val_dataset(fold, bs=BATCH_SIZE, print_shape=False, return_steps=False):
    if USE_ALL_TRAINING_DATA:
        return (None, 0) if return_steps else None
        
    X = np.load(f'/kaggle/input/sartorius-kfolds/X_fold_{fold}_val.npz')['v']
    y = np.load(f'/kaggle/input/sartorius-kfolds/y_fold_{fold}_val.npz')['v']
    if print_shape:
        print(f'X shape: {X.shape}, y shape: {y.shape}')
    
    val_dataset = tf.data.Dataset.from_tensor_slices((X, y))
    val_dataset = val_dataset.map(process_image_val, num_parallel_calls=NUM_PARALLEL_CALLS)
    val_dataset = val_dataset.batch(bs)
    val_dataset = val_dataset.prefetch(1)
    
    if return_steps:
        return val_dataset, len(X) // bs
    else:
        return val_dataset

In [None]:
# Show Image and Label Statistics as Sanity Check
if not USE_ALL_TRAINING_DATA:
    images_val, labels_val = next(iter(get_val_dataset(0, print_shape=True)))
    print(f'images_val shape: {images_val.shape}, labels_val shape: {labels_val.shape}')
    print(f'images_val dtype: {images_val.dtype}, labels_val dtype: {labels_val.dtype}')
    print(f'images min: {np.min(images_val):.2f}, max: {np.max(images_val):.2f}')
    print(f'images_val mean: {np.mean(images_val):.2f}, std: {np.std(images_val):.2f}')

In [None]:
# Plot some Validation Images
if not USE_ALL_TRAINING_DATA:
    show_batch(get_val_dataset(0, bs=32))

# Learning Rate Scheduler

Linear warmup with cosine decay, the usual. Keep in mind each epoch is 10 iterations, not 1 as conventionally. This is done to keep the logs and training history graphs readable, otherwise there would be 300 epochs.

In [None]:
def lrfn(current_step, num_warmup_steps, lr_max, num_cycles=0.50, num_training_steps=EPOCHS):
    
    if current_step < num_warmup_steps:
        return float(current_step + 1) / float(max(1, num_warmup_steps + 1)) * lr_max
    else:
        progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))

        return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) * lr_max

In [None]:
def plot_lr_schedule(lr_schedule):
    fig = plt.figure(figsize=(20, 10))
    plt.plot([None] + lr_schedule + [None])
    # X Labels
    x = np.arange(EPOCHS + 2)
    x_axis_labels = [None] + list(map(str, np.arange(1, EPOCHS+1))) + [None]
    plt.xlim([0, EPOCHS + 1])
    plt.xticks(x, x_axis_labels) # set tick step to 1 and let x axis start at 1
    
    # Increase y-limit for better readability
    plt.ylim([0, max(lr_schedule) * 1.1])
    
    # Title
    schedule_info = f'start: {lr_schedule[0]:.1E}, max: {max(lr_schedule):.1E}, final: {lr_schedule[-1]:.1E}'
    plt.title(f'Step Learning Rate Schedule, {schedule_info}', size=18, pad=12)
    
    # Plot Learning Rates
    for x, val in enumerate(lr_schedule):
        if x < len(lr_schedule) - 1:
            if lr_schedule[x - 1] < val:
                ha = 'right'
            else:
                ha = 'left'
        elif x == 0:
            ha = 'right'
        else:
            ha = 'left'
        plt.plot(x + 1, val, 'o', color='black');
        offset_y = (max(lr_schedule) - min(lr_schedule)) * 0.02
        plt.annotate(f'{val:.1E}', xy=(x + 1, val + offset_y), size=12, ha=ha)
    
    plt.xlabel('Epoch', size=16, labelpad=5)
    plt.ylabel('Learning Rate', size=16, labelpad=5)
    plt.grid()
    plt.show()

# Learning rate for encoder
LR_SCHEDULE = [lrfn(step, num_warmup_steps=0, lr_max=LR_MAX, num_cycles=0.50) for step in range(EPOCHS)]
plot_lr_schedule(LR_SCHEDULE)

# Callbacks

In [None]:
# Learning Rate Callback
lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda step: LR_SCHEDULE[step], verbose=1)

# Training

In [None]:
# Get the Training and Validation dataset
train_dataset, train_steps_per_epoch = get_train_dataset(0, return_steps=True)
val_dataset, val_steps_per_epoch = get_val_dataset(0, return_steps=True)

print(f'Train Steps per Epoch: {train_steps_per_epoch}')
print(f'Val Steps per Epoch: {val_steps_per_epoch}')

In [None]:
# Fit the model
history = model.fit(
    train_dataset,
    # Due to low number of samples do 10 iterations per epoch
    steps_per_epoch = train_steps_per_epoch * 10,
    validation_data = val_dataset,
    validation_steps = val_steps_per_epoch,
    epochs = EPOCHS,
    verbose = 1,
    callbacks = [
        lr_callback,
    ],
)

In [None]:
# Save the model weights
model.save_weights('model.h5')

# Training Visualisation

Let's check what the model learned during training. Each row consists of the input image, the actual mask, the predicted mask and the mask where each pixel is thresholded at 0.50 to form a binary map.

In [None]:
def plot_results(dataset, nrows, ncols=4):
    def de_pad_batch(batch):
        pad_h = (HEIGHT_TARGET - HEIGHT) // 2
        pad_w = (WIDTH_TARGET - WIDTH) // 2
        
        return batch[:, pad_h:pad_h+HEIGHT, pad_w:pad_w+WIDTH]
    
    images, labels = next(iter(dataset))
    
    # Predict Masks
    labels_pred = model(images, training=False)
    
    # Remove Padding
    labels = de_pad_batch(labels)
    images = (((images.numpy() * 0.108) * 128) + 128).astype(np.uint8)
    images = de_pad_batch(images)
    labels_pred = de_pad_batch(labels_pred)
    
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*8, nrows*6))
    
    for r, (img, lbl, lbl_pred) in enumerate(zip(images, labels, labels_pred)):
        # Plot Image
        axes[r, 0].imshow(img)
        axes[r, 0].set_title('Image', size=18)
        axes[r, 0].axis(False)
        
        # Mask
        axes[r, 1].imshow(lbl)
        axes[r, 1].set_title('Mask', size=18)
        axes[r, 1].axis(False)
        
        # Predicted Mask with Threshold
        axes[r, 2].imshow(lbl_pred)
        axes[r, 2].set_title('Mask Predicted', size=18)
        axes[r, 2].axis(False)
        
        # Predicted Mask with Threshold
        lbl_pred_th50 =tf.cast(lbl_pred > 0.50, tf.uint8)
        axes[r, 3].imshow(lbl_pred_th50)
        axes[r, 3].set_title('Mask Predicted Threshold 0.50', size=18)
        axes[r, 3].axis(False)

In [None]:
# Training Visualisation, especially Astro cells have many errors
plot_results(get_train_dataset(0, bs=16), 16)

In [None]:
# Validation Visualisation
if not USE_ALL_TRAINING_DATA:
    plot_results(get_val_dataset(0, bs=8), 8)

# Training History

In [None]:
def plot_history_metric(metric, f_best=np.argmax):
    values = history.history[metric]
    plt.figure(figsize=(15, 8))
    N_EPOCHS = len(values)
    val = 'val' in ''.join(history.history.keys())
    # Epoch Ticks
    if N_EPOCHS <= 20:
        x = np.arange(1, N_EPOCHS + 1)
    else:
        x = [1, 5] + [10 + 5 * idx for idx in range((N_EPOCHS - 10) // 5 + 1)]
    x_ticks = np.arange(1, N_EPOCHS+1)
    
    # Validation
    if val:
        val_values = history.history[f'val_{metric}']
        val_argmin = f_best(val_values)
        plt.scatter(val_argmin + 1, val_values[val_argmin], color='purple', s=75, marker='o', label='val_best')
        plt.plot(x_ticks, val_values, label='val')
        
    # summarize history for accuracy
    plt.plot(x_ticks, values, label='train')
    argmin = f_best(values)
    plt.scatter(argmin + 1, values[argmin], color='red', s=75, marker='o', label='train_best')
    
    plt.title(f'Model {metric}', fontsize=24, pad=10)
    plt.ylabel(metric, fontsize=20, labelpad=10)
    plt.xlabel('epoch', fontsize=20, labelpad=10)
    plt.tick_params(axis='x', labelsize=8)
    plt.xticks(x, fontsize=16) # set tick step to 1 and let x axis start at 1
    plt.yticks(fontsize=16)
    plt.legend(prop={'size': 18})
    plt.grid()

In [None]:
plot_history_metric('loss', f_best=np.argmin)

In [None]:
plot_history_metric('binary_accuracy')

In [None]:
plot_history_metric('auc')

In [None]:
# (True Posives) / (True Positives + False Positives)
# How many percent of predicted cell pixels are correct
# ~80% of pixels predicted to contain a cell are correct!
plot_history_metric('precision')

In [None]:
# (True Positives) / (True Positives + False Negatives)
# How many percent of the pixels containing cells are found
# ~70% of all pixels containing a cell are found!
plot_history_metric('recall')

# Area Under the Receiver Operating Characteristic Curve

The Area Under the Receiver Operating Characeristic Curve shows the precision/recall trade off for a given threshold. This is valuable when selecting a threshold for converting prediction values in the range \[0,1\] to a binary mask as done with a threshold of 0.50 in the visualisations above.

In [None]:
def get_y_and_y_pred():
    y = None
    y_pred = None
    
    if USE_ALL_TRAINING_DATA:
        dataset, total = get_train_dataset(0, return_steps=True, repeat=False)
    else:
        dataset, total = get_val_dataset(0)
    
    for images, label in tqdm(dataset, total=total):
        label_pred = model(images, training=False)
        y_batch = label.numpy().flatten().astype(np.uint8)
        y_pred_batch = label_pred.numpy().flatten().astype(np.float32)
        if y is None and y_pred is None:
            y = y_batch
            y_pred = y_pred_batch
        else:
            y = np.concatenate((y, y_batch), axis=0)
            y_pred = np.concatenate((y_pred, y_pred_batch), axis=0)
        
    return y, y_pred
        
y, y_pred = get_y_and_y_pred()

# Show the amount of pixels predicted, 232 million!
print(f'y shape: {y.shape}, y_pred shape: {y_pred.shape}')

In [None]:
# Computing Compute Receiver operating characteristic, takes several minutes...
false_positive_rate, true_positive_rate, _ = metrics.roc_curve(y, y_pred)

In [None]:
plt.figure(figsize=(15,8))
plt.plot(false_positive_rate, true_positive_rate, color='darkorange', label='ROC')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Guessing')
plt.title('Area Under the Receiver Operating Characteristic Curve', size=24)
plt.ylabel('True Positive Rate', size=18, labelpad=10)
plt.xlabel('False Positive Rate', size=18, labelpad=10)
plt.xticks(size=16)
plt.yticks(size=16)
plt.grid()
plt.legend(prop={'size': 16})
plt.show()

In [None]:
# Remove false_positive_rate and true_positive_rate to reduce memory usage
del false_positive_rate, true_positive_rate
gc.collect()

# Precision/Recall/Threshold Curve

In [None]:
# Compute Precision/Recall Curve, takes several minutes...
precision, recall, thresholds = metrics.precision_recall_curve(y, y_pred)
thresholds = np.concatenate(([0], thresholds))

Precision/Recall curve shows the precision (fraction of pixels predicted as cell pixels that actually are cell pixels) at a given recall (fraction of cell pixels predicted as cell pixels).

In [None]:
plt.figure(figsize=(15,8))
plt.plot(precision, recall, color='darkorange', label='Precision/Recall')
plt.title('Precision/Recall Curve', size=24)
plt.xlabel('Precision', size=18, labelpad=10)
plt.ylabel('Recall', size=18, labelpad=10)
plt.xticks(size=16)
plt.yticks(size=16)
plt.grid()
plt.legend(prop={'size': 16})
plt.show()

Threshold/Recall Curve shows the recall value at a given threshold, this allows to pick a threshold for a desired recall value.

In [None]:
plt.figure(figsize=(15,8))
plt.plot(recall, thresholds,  color='darkorange', label='Recall/Threshold')
plt.title('Threshold/Recall Curve', size=24)
plt.xlabel('Threshold', size=18, labelpad=10)
plt.ylabel('Recall', size=18, labelpad=10)
plt.xticks(size=16)
plt.yticks(size=16)
plt.grid()
plt.legend(prop={'size': 16})
plt.show()

Threshold/Precision Curve shows the precision value at a given threshold, this allows to pick a threshold for a desired precision value.

In [None]:
plt.figure(figsize=(15,8))
plt.plot(thresholds, precision,  color='darkorange', label='Precision/Threshold')
plt.title('Threshold/Precision Curve', size=24)
plt.xlabel('Threshold', size=18, labelpad=10)
plt.ylabel('Precision', size=18, labelpad=10)
plt.xticks(size=16)
plt.yticks(size=16)
plt.grid()
plt.legend(prop={'size': 16})
plt.show()