The previous notebook (inception_resnet_v2_self_trained_on_200x150) created "baseline models" that I can improve on. For example I haven’t used the tabular data and neither have I used any image augmentation. Also, the original architecture likes square images, but I’m giving it rectangular ones. ROI detection net will have to be trained to enable the possibility to detect the main location of the lesion and crop the image accordingly. Both of these points will be addressed next, and this notebook is about improving the results by using tabular data.

In [10]:
import os
import absl.logging

import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow import keras
from tensorflow.keras.applications.inception_resnet_v2 import InceptionResNetV2

absl.logging.set_verbosity(absl.logging.ERROR)

In [11]:
get_names = lambda root_path: [
    file_name.split('.')[0]
    for dir_path, _, file_names in os.walk(root_path)
    for file_name in file_names
]
base_dir = os.path.join('..', 'data', 'images_original_inception_resnet_v2_200x150_splitted')
train_dir = os.path.join(base_dir, 'training')
valid_dir = os.path.join(base_dir, 'validation')

Creating the tabular data:

In [12]:
metadata_path = os.path.join('..', 'data', 'HAM10000_metadata.csv')
data = pd.read_csv(metadata_path)
data['age'] = data.groupby('sex')['age'].transform(lambda x: x.fillna(x.mean()))
train_names = set(get_names(train_dir))
valid_names = set(get_names(valid_dir))
relevant_cols = ['dx', 'age', 'sex', 'localization']
train_df = data[data['image_id'].isin(train_names)].sort_values(by='image_id')[relevant_cols]
valid_df = data[data['image_id'].isin(valid_names)].sort_values(by='image_id')[relevant_cols]
train_age = np.expand_dims(train_df['age'].astype(int).to_numpy(), -1)
train_sex_categories = pd.get_dummies(train_df['sex']).to_numpy()
train_localization_categories = pd.get_dummies(train_df['localization']).to_numpy()
valid_age = np.expand_dims(valid_df['age'].astype(int).to_numpy(), -1)
valid_sex_categories = pd.get_dummies(valid_df['sex']).to_numpy()
valid_localization_categories = pd.get_dummies(valid_df['localization']).to_numpy()
X_train = np.hstack((train_age, train_sex_categories, train_localization_categories))
y_train = pd.get_dummies(train_df['dx'])
X_valid = np.hstack((valid_age, valid_sex_categories, valid_localization_categories))
y_valid = pd.get_dummies(valid_df['dx'])

Creating the image data:

In [13]:
def preprocess_dataset(dataset: tf.data.Dataset) -> tf.data.Dataset:
    rescale = keras.layers.Rescaling(1./255)

    return dataset.map(lambda image: rescale(image))


train_generator = keras.utils.image_dataset_from_directory(
    train_dir,
    labels=None,
    label_mode=None,
    image_size=(150, 200),
    batch_size=64,
    shuffle=False) # setting this param to None because the labels will be taken care of separately
valid_generator = keras.utils.image_dataset_from_directory(
    valid_dir,
    labels=None,
    label_mode=None,
    image_size=(150, 200),
    batch_size=64,
    shuffle=False) # setting this param to None because the labels will be taken care of separately
train_generator = preprocess_dataset(train_generator)
valid_generator = preprocess_dataset(valid_generator)

Found 8015 files belonging to 1 classes.
Found 2000 files belonging to 1 classes.


In [14]:
train_metadata_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(64)
valid_metadata_ds = tf.data.Dataset.from_tensor_slices((X_valid, y_valid)).batch(64)
train_dataset = tf.data.Dataset.zip((train_generator, train_metadata_ds))
valid_dataset = tf.data.Dataset.zip((valid_generator, valid_metadata_ds))


def restructure(image_data, tabular_data_and_labels):
    tabular_data, labels = tabular_data_and_labels

    return (image_data, tabular_data), labels


train_dataset = train_dataset.map(restructure)
valid_dataset = valid_dataset.map(restructure)

In [16]:
SMALLER_WIDTH = 600 // 3
SMALLER_HEIGHT = 450 // 3
METADATA_COLS_COUNT = X_train.shape[1]


def get_model() -> keras.Model:
    base_model = InceptionResNetV2(include_top=False, weights=None, input_shape=(SMALLER_HEIGHT, SMALLER_WIDTH, 3))
    img_x = base_model.output
    img_x = keras.layers.Dropout(.4)(img_x)
    img_x = keras.layers.GlobalAveragePooling2D()(img_x)
    img_x = keras.layers.Dense(512)(img_x)
    img_x = keras.layers.PReLU()(img_x)
    img_x = keras.layers.Dropout(.4)(img_x)
    img_x = keras.layers.Dense(512)(img_x)
    img_x = keras.layers.PReLU()(img_x)
    tab_input = keras.Input(shape=(METADATA_COLS_COUNT,), name='tab_input')
    tab_x = keras.layers.Dense(64)(tab_input)
    tab_x = keras.layers.PReLU()(tab_x)
    tab_x = keras.layers.Dense(64)(tab_x)
    tab_x = keras.layers.PReLU()(tab_x)
    img_x = keras.layers.Dropout(.2)(img_x)
    combined = keras.layers.concatenate([img_x, tab_x])
    predictions = keras.layers.Dense(7, activation='softmax')(combined)
    model = keras.Model(inputs=[base_model.input, tab_input], outputs=predictions)

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    return model

In [17]:
def run_model(model_factory, model_name: str) -> None:
    early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10,
                                                   min_delta=1e-6)
    model_checkpoint = keras.callbacks.ModelCheckpoint(
        filepath='models/' + model_name + '{epoch}',
        save_best_only=True)
    tensor_board = keras.callbacks.TensorBoard(log_dir=f'tensor_logs/{model_name}')
    model = model_factory()

    model.fit(
        train_dataset,
        validation_data=valid_dataset,
        epochs=50,
        callbacks=[early_stopping, model_checkpoint, tensor_board])

In [18]:
run_model(get_model, f'images_original_inception_resnet_v2_200x150_with_tabular')

Epoch 1/50


INFO:tensorflow:Assets written to: models\images_original_inception_resnet_v2_200x150_with_tabular1\assets


Epoch 2/50
Epoch 3/50


INFO:tensorflow:Assets written to: models\images_original_inception_resnet_v2_200x150_with_tabular3\assets


Epoch 4/50


INFO:tensorflow:Assets written to: models\images_original_inception_resnet_v2_200x150_with_tabular4\assets


Epoch 5/50


INFO:tensorflow:Assets written to: models\images_original_inception_resnet_v2_200x150_with_tabular5\assets


Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50


INFO:tensorflow:Assets written to: models\images_original_inception_resnet_v2_200x150_with_tabular9\assets


Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50


INFO:tensorflow:Assets written to: models\images_original_inception_resnet_v2_200x150_with_tabular17\assets


Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50


INFO:tensorflow:Assets written to: models\images_original_inception_resnet_v2_200x150_with_tabular21\assets


Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50


INFO:tensorflow:Assets written to: models\images_original_inception_resnet_v2_200x150_with_tabular27\assets


Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
