In [86]:
from IPython.display import clear_output

import warnings
warnings.filterwarnings('ignore')

# Data Preparation

In [83]:
# !kaggle datasets download sleep3r/melanoma <--- download this

In [87]:
import os
import pickle
from typing import Tuple

import cv2
import numpy as np
import pandas as pd

from tqdm import tqdm_notebook

IMG_WIDTH, IMG_HEIGHT = 48, 48
DATA_PATH = './dataset'

In [85]:
def read_image(path: str) -> np.ndarray:
    img = cv2.imread(path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
    return img

def prepare_data(data_path: str = DATA_PATH) -> Tuple[np.ndarray, np.ndarray, np.array, np.array]:
    train_imgs, test_imgs = [], []
    y_train, y_test = [], []

    for dataset in ("train_sep", "valid"):
        for label, label_name in enumerate(("Melanoma", "NotMelanoma")):
            print(dataset, '-', label_name)
            for img in tqdm_notebook(os.listdir(f"{DATA_PATH}/{dataset}/{label_name}")):
                image = read_image(f"{DATA_PATH}/{dataset}/{label_name}/{img}")
                
                if dataset == "train_sep":
                    train_imgs.append(image)
                    y_train.append(label)
                else:
                    test_imgs.append(image)
                    y_test.append(label)
            clear_output(wait=True)
            
    X_train = np.array(train_imgs)
    X_test = np.array(test_imgs)

    y_train = np.array(y_train)
    y_test = np.array(y_test)
    
    return X_train, X_test, y_train, y_test

In [None]:
X_train, X_test, y_train, y_test = prepare_data()

# Visualisations

In [88]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [89]:
def plot_samples(X: np.ndarray = X_train, y: np.array = y_train):
    titles = [''] * 10
    titles[2] = "Melanoma"
    titles[-3] = "No melanoma"
    ramdom_img = lambda X: X[np.random.randint(X.shape[0])]
    fig = make_subplots(rows=2, cols=5, start_cell="top-left", subplot_titles=titles)
    
    for col in range(1, 6):
        fig.add_trace(go.Image(z=ramdom_img(X[np.where(y == 0)])), row=1, col=col)
                      
    for col in range(1, 6):
        fig.add_trace(go.Image(z=ramdom_img(X[np.where(y == 1)])), row=2, col=col)
            
    fig.update_layout(title_text="Skin examples")
    fig.show()

In [192]:
def plot_history(history: dict = None, history_path: str = None):
    if history_path:
        with open(history_path, "rb") as f:
            history = pickle.load(f)
            
    fig = make_subplots(rows=1, cols=2, start_cell="top-left", subplot_titles=("Loss", "Acc"))
    
    x = [*range(1, len(history['loss']) + 1)]
    
    fig.add_trace(go.Scatter(x=x, y=history['loss'], name="Train loss"), row=1, col=1)
    fig.add_trace(go.Scatter(x=x, y=history['val_loss'], name="Val loss"), row=1, col=1)
    
    fig.add_trace(go.Scatter(x=x, y=history['accuracy'], name="Train accuracy"), row=1, col=2)
    fig.add_trace(go.Scatter(x=x, y=history['val_accuracy'], name="Val accuracy"), row=1, col=2)
    
    fig.update_layout(title_text="Validation curves")
    fig.show()

In [181]:
plot_samples()

# Evaluation

In [155]:
import plotly.figure_factory as ff
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, roc_curve

In [219]:
def evaluate(model, X_test, y_test):
    if not isinstance(model, Model):
        preds = model.predict_classes(X_test, verbose=1)
    else:
        preds = np.argmax(model.predict(X_test, verbose=1), axis=1)

    m = confusion_matrix(y_test, preds)

    fig = ff.create_annotated_heatmap(m, x=['Positive', 'Negative'], y=['Negative', 'Positive'], 
                                      colorscale=[[0, 'navy'], [1, 'plum']])
    fig.update_layout(
        title="Confusion matrix",
        xaxis_title="Predicted",
        yaxis_title="Actual",
    )
    fig.show()

    fpr, tpr, threshold = roc_curve(y_test, preds)

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], line={"dash": 'dot'}, name='Random'))
    fig.add_trace(go.Scatter(x=fpr, y=tpr, name='Fitted model'))
    fig.update_layout(title=f"ROC-AUC: {round(roc_auc_score(y_test, preds), 3)}")
    fig.show()

# Keras nets

In [227]:
import tensorflow as tf
from keras.layers import Input, Lambda, Dense, Flatten, Conv2D, \
                         MaxPooling2D, Dropout, BatchNormalization, MaxPool2D, Activation, Add, ZeroPadding2D
from keras.regularizers import l2
from keras.models import Model, Sequential
from keras.applications.vgg16 import VGG16, preprocess_input
from keras.utils.np_utils import to_categorical

### normal CNN

In [81]:
def create_model_cnn(input_shape: tuple, n_classes: int = 2, weights_path: str = None):
    model = Sequential()
    
    model.add(Conv2D(filters=64, kernel_size=(5, 5), activation="relu", input_shape=input_shape))
    model.add(Conv2D(filters=128, kernel_size=(5, 5), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Flatten())
    
    model.add(Dense(1024, activation="relu"))
    model.add(BatchNormalization())
    model.add(Dense(n_classes, activation="softmax"))
    
    if weights_path:
        model.load_weights(weights_path)

    model.compile(loss="categorical_crossentropy", 
                  optimizer="adam", 
                  metrics=["accuracy"])
    
    return model

In [82]:
model = create_model_cnn(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), weights_path="./models/CNN/model.h5")

In [196]:
plot_history(history_path="./history/CNN.history")
evaluate(model, X_test, y_test)



### deep CNN

In [200]:
def create_model_deep_cnn(input_shape: tuple, n_classes: int = 2, weights_path: str = None):
    model = Sequential()
    
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', kernel_initializer='he_normal', input_shape=input_shape))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', kernel_initializer='he_normal'))
    model.add(MaxPool2D((2, 2)))
    
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal'))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same', kernel_initializer='he_normal'))
    model.add(Dropout(0.2))
    
    model.add(Flatten())
    
    model.add(Dense(256, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(n_classes, activation='softmax'))
    
    if weights_path:
        model.load_weights(weights_path)

    model.compile(optimizer=tf.train.AdamOptimizer(), 
                  loss=tf.losses.softmax_cross_entropy,
                  metrics=['accuracy'])
   
    return model

In [205]:
model = create_model_deep_cnn(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), weights_path="./models/deepCNN/model.h5")

Instructions for updating:
Use tf.cast instead.


In [206]:
plot_history(history_path="./history/deepCNN.history")
evaluate(model, X_test, y_test)



### ResNet

In [207]:
def resnet_block(x, size, kernel_size=(5, 5)):
    y = BatchNormalization(momentum=0.9)(x)
    y = Activation('relu')(y)
    y = Conv2D(size, kernel_size=kernel_size, padding='same')(y)
    y = BatchNormalization(momentum=0.9)(y)
    y = Activation('relu')(y)
    y = Conv2D(size, kernel_size=kernel_size, padding='same')(y)
    return Add()([x, y])

def create_model_resnet(input_shape: tuple, n_classes: int = 2, weights_path: str = None, heads_number=1):
    inputs = Input(shape=input_shape)
    
    layer = Conv2D(32, kernel_size=(5, 5), strides=(2, 2), activation='relu')(inputs)
    layer = resnet_block(layer, 32)
    layer = resnet_block(layer, 32)
    
    layer = Conv2D(64, kernel_size=(2, 2), strides=(2, 2), activation='relu')(layer)
    layer = resnet_block(layer, 64)
    layer = resnet_block(layer, 64)
    
    layer = Conv2D(128, kernel_size=(2, 2), strides=(2, 2), activation='relu')(layer)
    layer = resnet_block(layer, 128)
    layer = resnet_block(layer, 128)
    
    layer = Conv2D(256, kernel_size=(2, 2), strides=(2, 2), activation='relu')(layer)
    layer = resnet_block(layer, 256)
    layer = resnet_block(layer, 256)
    layer = BatchNormalization(momentum=0.9)(layer)
    
    layer = Flatten()(layer)
    
    layer = Dense(2048, activation='relu')(layer)
    outputs = [Dense(n_classes)(layer) for _ in range(heads_number)]
    
    model = Model(inputs=inputs, outputs=outputs)
    
    if weights_path:
        model.load_weights(weights_path)
        
    model.compile(optimizer=tf.train.AdamOptimizer(), 
                  loss=[tf.losses.softmax_cross_entropy for _ in range(heads_number)], 
                  metrics=['accuracy'])
    
    return model

In [212]:
model = create_model_resnet(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), weights_path="./models/resNet/model.h5")

In [220]:
plot_history(history_path="./history/resNet.history")
evaluate(model, X_test, y_test)



### AlexNet

In [228]:
def create_model_alexnet(input_shape: tuple, n_classes: int = 2, l2_reg: float = 0.0, weights_path: str = None):
    model = Sequential()

    model.add(Conv2D(32, (5, 5), input_shape=input_shape, padding='same', kernel_regularizer=l2(l2_reg)))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (5, 5), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(128, (3, 3), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(256, (3, 3), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Conv2D(512, (3, 3), padding='same'))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(1024))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(2048))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(n_classes))
    model.add(BatchNormalization())
    model.add(Activation('softmax'))
    
    if weights_path:
        model.load_weights(weights_path)
    
    model.compile(optimizer=tf.train.AdamOptimizer(), 
                  loss=tf.losses.softmax_cross_entropy,
                  metrics=['accuracy'])

    return model

In [229]:
model = create_model_alexnet(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), weights_path="./models/alexNet/model.h5")

In [230]:
plot_history(history_path="./history/alexNet.history")
evaluate(model, X_test, y_test)



## VGG16 Pretrained

In [73]:
def create_model_vgg(input_shape: tuple, weights_path: str = None):
    vgg = VGG16(input_shape=input_shape, weights='imagenet', include_top=False)

    for layer in vgg.layers:
        layer.trainable = False

    x = vgg.output
    x = Flatten()(x)
    x = Dense(1024, activation="relu")(x)
    x = BatchNormalization()(x)
    x = Dense(1024, activation="relu")(x)
    predictions = Dense(2, activation="softmax")(x)

    model = Model(inputs = vgg.input, outputs = predictions)
        
    if weights_path:
        model.load_weights(weights_path)
        
    model.compile(loss="categorical_crossentropy", 
                      optimizer="adam", 
                      metrics=["accuracy"])
    
    return model

In [232]:
model = create_model_vgg(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), weights_path="./models/vggPretrained/model.h5")

In [233]:
plot_history(history_path="./history/vggPretrained.history")
evaluate(model, X_test, y_test)



# Train

In [15]:
def shuffle_data(X: np.ndarray, y: np.array) -> Tuple[np.ndarray, np.array]:
    assert X.shape[0] == y.shape[0]
    
    shuffled_indices = np.arange(X.shape[0])
    np.random.shuffle(shuffled_indices)
    X, y = X[shuffled_indices], y[shuffled_indices]
    return X, y

In [16]:
def prepare_data(*data: tuple) -> tuple:
    X_train, X_test, y_train, y_test = data
    
    X_train_norm = X_train / 255.0
    X_test_norm = X_test / 255.0

    y_train_onehot = to_categorical(y_train, 2)
    y_test_onehot = to_categorical(y_test, 2)
    
    X_train_norm, y_train_onehot = shuffle_data(X_train_norm, y_train_onehot)
    X_test_norm, y_test_onehot = shuffle_data(X_test_norm, y_test_onehot)
    
    return X_train_norm, X_test_norm, y_train_onehot, y_test_onehot

In [17]:
def train_net(model, *data: tuple, name: str, epochs = 40, batch_size=100):
    X_train, X_test, y_train, y_test = data
    
    history = model.fit(X_train_norm, y_train_onehot,
                        validation_data=(X_test_norm, y_test_onehot),
                        batch_size=batch_size, epochs=epochs)

    model.save(f"./models/{name}/model.h5")
    with open(f"./history/{name}.history", "wb") as f:
        pickle.dump(history.history, f)

In [20]:
X_train_norm, X_test_norm, y_train_onehot, y_test_onehot = prepare_data(X_train, X_test, y_train, y_test)
data = X_train_norm, X_test_norm, y_train_onehot, y_test_onehot

In [178]:
model = create_model_cnn(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), n_classes=2)
train_net(model, *data, name="CNN", epochs=10, batch_size=50)

Train on 10682 samples, validate on 3562 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [179]:
model = create_model_deep_cnn(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), n_classes=2)
train_net(model, *data, name="deepCNN", epochs=10, batch_size=50)

Train on 10682 samples, validate on 3562 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [180]:
model = create_model_resnet(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), n_classes=2)
train_net(model, *data, name="resNet", epochs=10, batch_size=50)

Train on 10682 samples, validate on 3562 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [183]:
model = create_model_alexnet(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3), n_classes=2, l2_reg=0.5)
train_net(model, *data, name="alexNet", epochs=10, batch_size=50)

Train on 10682 samples, validate on 3562 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [75]:
model = create_model_vgg(input_shape=(IMG_WIDTH, IMG_HEIGHT, 3))
train_net(model, *data, name="vggPretrained", epochs=10, batch_size=40)

Train on 10682 samples, validate on 3562 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
