# The role of segmentation in skin lesion classification using an ISIC dataset
*Lilian MALLARDEAU*

In [1]:
import numpy as np
import pandas as pd
import cv2

import skimage.morphology
import skimage.segmentation

import matplotlib
import matplotlib.pyplot as plt
#matplotlib.rcParams['figure.dpi'] = 150

import keras
from tensorflow.keras.applications import EfficientNetB0 as EfficientNet

from utils import *
from notifier import *

In [2]:
# Parameters
csv_file = "dataset/ISIC_2020_Training_GroundTruth_v2.csv"
duplicates_csv_file = "dataset/ISIC_2020_Training_Duplicates.csv"
images_folder = "dataset/train_jpeg/"

dataset_size = 20
epochs = 300
batch_size = 256
input_shape = (224, 224, 3)

notifier = TelegramNotifier()

In [3]:
metadata = pd.read_csv(csv_file)
duplicates = list(pd.read_csv(duplicates_csv_file)['image_name_2'])

# Removing duplicates
metadata.drop(metadata[metadata['image_name'].map(lambda x: x in duplicates)].index, inplace=True)
metadata.reset_index(drop=True, inplace=True)

In [4]:
def load_image(filename):
    img = cv2.imread(filename)
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

def load_images(filenames):
    return [load_image(filename) for filename in filenames]

def resize_image(image):
    return cv2.resize(image, input_shape[:2])

In [None]:
benign = metadata[metadata['benign_malignant'] == 'benign']
malignant = metadata[metadata['benign_malignant'] == 'malignant']

sample = metadata.sample(dataset_size)

notifier.send_message("Loading images...")
train_images = load_images("dataset/train_jpeg/" + sample['image_name'] + ".jpg")
train_labels = sample['target']

notifier.send_message("Resizing images...")
train_images_resized = np.empty((dataset_size, *input_shape))
for i, img in enumerate(train_images):
    train_images_resized[i] = resize_image(img)

# Artifacts removal

In [None]:
pictures_grid(
    [
        (train_images_resized[0], "Original image"),
        (apply_morpho_closing(train_images_resized[0]), "Image with morphological closing applied"),
    ],
    layout=(1, 2),
)

In [None]:
train_images = [apply_morpho_closing(img) for img in train_images]

# Unvignetting

In [None]:
def unvignette(img):
    w, h = img.shape[:2]
    new_img = img.copy()
    kernel_x = cv2.getGaussianKernel(w, 150)
    kernel_y = cv2.getGaussianKernel(h, 150)
    kernel = kernel_y * kernel_x.T
    mask = 255 * kernel / np.linalg.norm(kernel)
    for i in range(3):
        new_img[:, :, i] = new_img[:, :, i] / mask.T
    return new_img

# Segmentation

## Using KMeans

In [None]:
img = apply_morpho_closing(train_images[0], 6)
pictures_grid(
    [
        (img, "Original image"),
        (kmeans_mask(img, return_rgb=True), "Mask"),
        (kmeans_segmentation(img), "Segmented image"),
    ],
    layout=(1, 3),
)

In [None]:
train_images_segmented = [kmeans_segmentation(img) for img in train_images]

## Using the Chan-Vese algorithm

In [None]:
def chanvese_mask(img, extended_output=False):
    img_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    mask = skimage.segmentation.chan_vese(img_gray, mu=.25, lambda1=1, lambda2=1, tol=1e-3, max_iter=200, dt=0.5, init_level_set="checkerboard", extended_output=extended_output)
    return mask

def chanvese_segmentation(img):
    mask = chanvese_mask(img)
    segmented_image = img.copy()
    segmented_image[mask] = 255
    return segmented_image

In [None]:
pictures_grid(
    [
        (train_images[0], "Original image"),
        (chanvese_segmentation(train_images[0]), "Chan-Vese segmentation"),
    ],
    layout=(1, 2),
)

# Data augmentation
for malignant pictures

In [None]:
def augment_image(image):
    augmented_images = []
    vertical_flip = cv2.flip(image, 0)
    horizontal_flip = cv2.flip(image, 1)
    augmented_images.append(cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE))
    augmented_images.append(cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE))
    augmented_images.append(cv2.rotate(image, cv2.ROTATE_180))
    augmented_images.append(vertical_flip)
    augmented_images.append(horizontal_flip)
    augmented_images.append(cv2.rotate(vertical_flip, cv2.ROTATE_90_CLOCKWISE))
    augmented_images.append(cv2.rotate(horizontal_flip, cv2.ROTATE_90_CLOCKWISE))
    return augmented_images

In [None]:
for index, (_, row) in enumerate(sample.iterrows()):
    if row['benign_malignant'] == 'malignant':
        augmented_images = augment_image(train_images[index])
        train_images.extend(augmented_images)
        train_labels = train_labels.append(pd.Series([1]*len(augmented_images)))

# Classification

In [None]:
efficientnet = EfficientNet(weights='imagenet', include_top=False, input_shape=input_shape, classes=2)

model = keras.models.Sequential()
model.add(efficientnet)
model.add(keras.layers.GlobalAveragePooling2D())
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.Dense(1, activation='sigmoid'))

model.summary()

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])

# Early stopping to monitor the validation loss and avoid overfitting
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, restore_best_weights=True)

# Reducing learning rate on plateau
rlrop = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=5, factor=0.5, min_lr=1e-6, verbose=1)

# Checkpoint callback
checkpoint = keras.callbacks.ModelCheckpoint(
    filepath="checkpoints/checkpoint.epoch{epoch:02d}-loss{val_loss:.2f}.hdf5",
    save_weights_only=False,
    monitor='val_binary_accuracy',
    mode='max',
    save_best_only=True,
)
callbacks = [Notify(epochs), early_stop, rlrop, checkpoint]

history = model.fit(train_images_resized, train_labels, epochs=epochs, verbose=2, callbacks=callbacks, shuffle=True, class_weight={0:1, 1:10})

model.save_weights("model.h5")