<h3>This notebook contains training code of a neural network performing the "Region of Interest" (ROI) detection work. Its output will be used at later stages to preprocess images being fed to the actual classifier. ROI NN will detect bounding boxes surrounding lesions and another algorithm will crop the images so that the aspect ratio required by InceptionResNetV2 NN is used.</h3>

In [1]:
import os
import absl.logging
import PIL.Image

import numpy as np
import pandas as pd
import tensorflow as tf

from typing import Callable
from tensorflow import keras

absl.logging.set_verbosity(absl.logging.ERROR)

In [2]:
get_names = lambda root_path: [
    file_name.split('.')[0]
    for dir_path, _, file_names in os.walk(root_path)
    for file_name in file_names
]
get_paths = lambda path: [f'{os.path.join(root, file)}' for root, dirs, files in os.walk(path) for file in files]
base_dir = os.path.join('..', 'data', 'images_original_inception_resnet_v2_200x150_splitted')
train_dir = os.path.join(base_dir, 'training')
valid_dir = os.path.join(base_dir, 'validation')

In [3]:
def get_images_array(paths: list[str]) -> np.ndarray:
    rows = []
    rescale = keras.layers.Rescaling(1./255)

    for path in paths:
        with PIL.Image.open(path) as image:
            image_array = np.asarray(image)
            rescaled_image = rescale(image_array)
            rows.append(rescaled_image)

    return np.array(rows)


train_paths = get_paths(train_dir)
valid_paths = get_paths(valid_dir)
X_train = get_images_array(train_paths)
X_valid = get_images_array(valid_paths)

In [4]:
SMALLER_WIDTH = 600 // 3
SMALLER_HEIGHT = 450 // 3

In [5]:
train_names = set(get_names(train_dir))
valid_names = set(get_names(valid_dir))
metadata_path = os.path.join('..', 'data', 'HAM10000_metadata_ext.csv')
data = pd.read_csv(metadata_path).sort_values(by='image_id')
relevant_cols = ['left', 'top', 'right', 'bottom']
train_df = data[data['image_id'].isin(train_names)].sort_values(by='image_id')[relevant_cols]
valid_df = data[data['image_id'].isin(valid_names)].sort_values(by='image_id')[relevant_cols]
ys_train = train_df.to_numpy().astype(float)
ys_train[:, [0, 2]] /= SMALLER_HEIGHT
ys_train[:, [1, 3]] /= SMALLER_WIDTH
ys_valid = valid_df.to_numpy().astype(float)
ys_valid[:, [0, 2]] /= SMALLER_HEIGHT
ys_valid[:, [1, 3]] /= SMALLER_WIDTH

Using simple IOU loss made the model get stuck - it wasn't getting any better or worse. Using CIOU makes the model start from a worse starting point but it's learning. Let's see how it does after 1000 epochs.

(Losses implemented after: https://medium.com/analytics-vidhya/different-iou-losses-for-faster-and-accurate-object-detection-3345781e0bf)

In [6]:
def iou_metric(y_true: tf.Tensor, y_pred: tf.Tensor) -> float:
    y_true = tf.reshape(y_true, [-1])
    y_pred = tf.reshape(y_pred, [-1])

    intersection = tf.reduce_sum(y_true * y_pred)
    union = tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection

    return (intersection + 1.0) / (union + 1.0)


def iou_loss(y_true: tf.Tensor, y_pred: tf.Tensor) -> float:
    return 1 - iou_metric(y_true, y_pred)

In [8]:
def ciou_metric(y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
    """
    Parameters:
    y_true -- ground truth bounding box, tensor of shape (?, 4), [xmin, ymin, xmax, ymax]
    y_pred -- predicted bounding box, tensor of shape (?, 4)

    Returns:
    ciou_score -- scalar score, tensor of shape ()
    """

    # Bounding box parameters
    y_true = tf.reshape(y_true, [-1, 4])
    y_pred = tf.reshape(y_pred, [-1, 4])
    true_x1, true_y1, true_x2, true_y2 = tf.split(y_true, 4, axis = -1)
    pred_x1, pred_y1, pred_x2, pred_y2 = tf.split(y_pred, 4, axis = -1)

    # IoU calculation
    intersect_w = tf.maximum(0.0, tf.minimum(true_x2, pred_x2) - tf.maximum(true_x1, pred_x1))
    intersect_h = tf.maximum(0.0, tf.minimum(true_y2, pred_y2) - tf.maximum(true_y1, pred_y1))
    intersection = intersect_w * intersect_h

    true_area = (true_x2 - true_x1) * (true_y2 - true_y1)
    pred_area = (pred_x2 - pred_x1) * (pred_y2 - pred_y1)
    union = true_area + pred_area - intersection

    iou = intersection / (union + 1e-9)  # Adding epsilon to avoid division by zero

    # Distance between the box centers
    true_center_x = (true_x1 + true_x2) / 2
    true_center_y = (true_y1 + true_y2) / 2
    pred_center_x = (pred_x1 + pred_x2) / 2
    pred_center_y = (pred_y1 + pred_y2) / 2

    center_distance = tf.square(true_center_x - pred_center_x) + tf.square(true_center_y - pred_center_y)

    # Enclosing box
    enclose_x1 = tf.minimum(true_x1, pred_x1)
    enclose_y1 = tf.minimum(true_y1, pred_y1)
    enclose_x2 = tf.maximum(true_x2, pred_x2)
    enclose_y2 = tf.maximum(true_y2, pred_y2)
    enclose_w = enclose_x2 - enclose_x1
    enclose_h = enclose_y2 - enclose_y1

    # Ciou term
    ciou_term = (1 - iou) + center_distance / (tf.square(enclose_w) + tf.square(enclose_h) + 1e-9)

    return 1.0 - tf.reduce_mean(ciou_term)  # Higher value is better


def ciou_loss(y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
    """
    Parameters:
    y_true -- ground truth bounding box, tensor of shape (?, 4), [xmin, ymin, xmax, ymax]
    y_pred -- predicted bounding box, tensor of shape (?, 4)

    Returns:
    ciou_loss -- scalar loss, tensor of shape ()
    """

    # Bounding box parameters
    y_true = tf.reshape(y_true, [-1, 4])
    y_pred = tf.reshape(y_pred, [-1, 4])
    true_x1, true_y1, true_x2, true_y2 = tf.split(y_true, 4, axis = -1)
    pred_x1, pred_y1, pred_x2, pred_y2 = tf.split(y_pred, 4, axis = -1)

    # IoU calculation
    intersect_w = tf.maximum(0.0, tf.minimum(true_x2, pred_x2) - tf.maximum(true_x1, pred_x1))
    intersect_h = tf.maximum(0.0, tf.minimum(true_y2, pred_y2) - tf.maximum(true_y1, pred_y1))
    intersection = intersect_w * intersect_h

    true_area = (true_x2 - true_x1) * (true_y2 - true_y1)
    pred_area = (pred_x2 - pred_x1) * (pred_y2 - pred_y1)
    union = true_area + pred_area - intersection

    iou = intersection / (union + 1e-9)  # Adding epsilon to avoid division by zero

    # Distance between the box centers
    true_center_x = (true_x1 + true_x2) / 2
    true_center_y = (true_y1 + true_y2) / 2
    pred_center_x = (pred_x1 + pred_x2) / 2
    pred_center_y = (pred_y1 + pred_y2) / 2

    center_distance = tf.square(true_center_x - pred_center_x) + tf.square(true_center_y - pred_center_y)

    # Enclosing box
    enclose_x1 = tf.minimum(true_x1, pred_x1)
    enclose_y1 = tf.minimum(true_y1, pred_y1)
    enclose_x2 = tf.maximum(true_x2, pred_x2)
    enclose_y2 = tf.maximum(true_y2, pred_y2)
    enclose_w = enclose_x2 - enclose_x1
    enclose_h = enclose_y2 - enclose_y1

    # Ciou term
    ciou_term = (1 - iou) + center_distance / (tf.square(enclose_w) + tf.square(enclose_h) + 1e-9)

    return tf.reduce_mean(ciou_term)

In [9]:
def get_model(loss: Callable, metric: Callable) -> keras.Model:
    def get_conv_module(prev: keras.layers.Layer, filters: int, kernel_size: int) -> keras.layers.Layer:
        x = keras.layers.Conv2D(filters, kernel_size, padding='same', activation='relu')(prev)
        x = keras.layers.Conv2D(filters * 2, kernel_size, padding='same', activation='relu')(x)
        x = keras.layers.MaxPooling2D()(x)

        return x


    _input = keras.layers.Input(shape=(SMALLER_HEIGHT, SMALLER_WIDTH, 3))
    conv_module = get_conv_module(_input, 16, 7)
    conv_module = get_conv_module(conv_module, 32, 5)
    conv_module = get_conv_module(conv_module, 64, 3)
    conv_module = keras.layers.Flatten()(conv_module)
    locator_module = keras.layers.Dense(256, activation='relu')(conv_module)
    locator_module = keras.layers.Dense(128, activation='relu')(locator_module)
    locator_module = keras.layers.Dense(4, activation='sigmoid')(locator_module)

    model = keras.Model(_input, locator_module)

    model.compile(optimizer='adam', loss=loss, metrics=[metric])

    print(model.summary())

    return model

In [10]:
def run_model(model_factory, model_name: str, loss: Callable, metric: Callable):
    early_stopping = keras.callbacks.EarlyStopping(monitor='val_iou_metric', patience=10,
                                                   min_delta=1e-6)
    model_checkpoint = keras.callbacks.ModelCheckpoint(
        filepath='models/' + model_name + '_{epoch}',
        save_best_only=True)
    tensor_board = keras.callbacks.TensorBoard(log_dir=f'tensor_logs/{model_name}')
    model = model_factory(loss, metric)

    return model.fit(
        X_train,
        ys_train,
        validation_data=(X_valid, ys_valid),
        epochs=50,
        batch_size=64,
        callbacks=[model_checkpoint, tensor_board])

In [11]:
history = run_model(get_model, f'roi_detection_iou', ciou_loss, ciou_metric)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 150, 200, 3)]     0         
                                                                 
 conv2d (Conv2D)             (None, 150, 200, 16)      2368      
                                                                 
 conv2d_1 (Conv2D)           (None, 150, 200, 32)      25120     
                                                                 
 max_pooling2d (MaxPooling2D  (None, 75, 100, 32)      0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 75, 100, 32)       25632     
                                                                 
 conv2d_3 (Conv2D)           (None, 75, 100, 64)       51264     
                                                             

INFO:tensorflow:Assets written to: models\roi_detection_iou_1\assets


Epoch 2/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_2\assets


Epoch 3/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_3\assets


Epoch 4/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_4\assets


Epoch 5/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_5\assets


Epoch 6/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_6\assets


Epoch 7/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_7\assets


Epoch 8/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_8\assets


Epoch 9/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_9\assets


Epoch 10/50
Epoch 11/50
Epoch 12/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_12\assets


Epoch 13/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_13\assets


Epoch 14/50
Epoch 15/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_15\assets


Epoch 16/50
Epoch 17/50
Epoch 18/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_18\assets


Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_27\assets


Epoch 28/50
Epoch 29/50
Epoch 30/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_30\assets


Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50


INFO:tensorflow:Assets written to: models\roi_detection_iou_49\assets


Epoch 50/50
