# 1. Initialize

In [None]:
from numpy.random import seed
seed(1)
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from glob import glob
from PIL import Image
from sklearn.model_selection import train_test_split

import keras
from keras.applications import Xception
from keras.models import Sequential, Model
from keras.layers import Activation,Dense, Dropout, Flatten, Conv2D, MaxPool2D,AveragePooling2D,GlobalMaxPooling2D
from keras import backend as K
from keras.wrappers.scikit_learn import KerasClassifier
from keras.layers.normalization import BatchNormalization
from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras import regularizers
from keras.optimizers import Adam, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

np.random.seed(123)

import gc
import psutil
from tqdm import tqdm

process = psutil.Process(os.getpid())
def print_current_ram():
    print(process.memory_info().rss / 1000000000, 'GB')

# 2. Data

## Ground Truth

In [None]:
df_gt = pd.read_csv('../input/isic-2019/ISIC_2019_Training_GroundTruth.csv', index_col='image')
df_gt

## UQ Test Set

In [None]:
uq_test_set = {
    'Melanocytic naevi': ['ISIC_0073202', 'ISIC_0073207', 'ISIC_0073208', 'ISIC_0073212', 'ISIC_0073219', 'ISIC_0073220', 'ISIC_0073222', 'ISIC_0073232', 'ISIC_0073240', 'ISIC_0073245'],
    'Vascular skin lesions': ['ISIC_0071787', 'ISIC_0071912', 'ISIC_0072012', 'ISIC_0072430', 'ISIC_0072479', 'ISIC_0072651', 'ISIC_0072919', 'ISIC_0072964', 'ISIC_0073031', 'ISIC_0073110'],
    'Dermatofibroma': ['ISIC_0071844', 'ISIC_0071858', 'ISIC_0071998', 'ISIC_0072033', 'ISIC_0072045', 'ISIC_0072137', 'ISIC_0072193', 'ISIC_0073112', 'ISIC_0073189', 'ISIC_0073193'],
    'Benign keratosis': ['ISIC_1897507', 'ISIC_2140099', 'ISIC_2371734', 'ISIC_3409440', 'ISIC_4354896', 'ISIC_5215191', 'ISIC_5407240', 'ISIC_5958409', 'ISIC_6511141', 'ISIC_6594555'],
    'Actinic keratosis': ['ISIC_0072940', 'ISIC_0072986', 'ISIC_0072992', 'ISIC_0073045', 'ISIC_0073068', 'ISIC_0073130', 'ISIC_0073153', 'ISIC_0073157', 'ISIC_0073214', 'ISIC_0073224'],
    'Basal cell carcinoma': ['ISIC_0073155', 'ISIC_0073161', 'ISIC_0073170', 'ISIC_0073172', 'ISIC_0073196', 'ISIC_0073200', 'ISIC_0073221', 'ISIC_0073225', 'ISIC_0073229', 'ISIC_0073246'],
    'Melanoma': ['ISIC_0073054', 'ISIC_0073065', 'ISIC_0073075', 'ISIC_0073097', 'ISIC_0073102', 'ISIC_0073115', 'ISIC_0073119', 'ISIC_0073127', 'ISIC_0073136', 'ISIC_0073143', 'ISIC_0073147', 'ISIC_0073156', 'ISIC_0073168', 'ISIC_0073173', 'ISIC_0073194', 'ISIC_0073203', 'ISIC_0073210', 'ISIC_0073218', 'ISIC_0073227', 'ISIC_0073231', 'ISIC_0073237', 'ISIC_0073238', 'ISIC_0073241', 'ISIC_0073249']
}
uq_test_ids = [y for x in uq_test_set.values() for y in x]

## Remove Test Set from Training Set

In [None]:
df_gt_remove_uq_test_set = df_gt.drop(uq_test_ids, errors='ignore')
display(df_gt_remove_uq_test_set)
print('Number of data removed:', len(df_gt) - len(df_gt_remove_uq_test_set))

## Training Data

In [None]:
imx, imy = 128, 128

In [None]:
image_isic = []
for idx in tqdm(df_gt_remove_uq_test_set.index):
    input_file_name = '../input/isic-2019/ISIC_2019_Training_Input/ISIC_2019_Training_Input/' + idx + '.jpg'
    image_isic.append(np.asarray(Image.open(input_file_name).resize((imx,imy))))

x_isic = np.asarray(image_isic)

del image_isic; gc.collect()

In [None]:
y_isic = np.asarray(df_gt_remove_uq_test_set)

In [None]:
x_train, x_validate, y_train, y_validate = train_test_split(x_isic, y_isic, test_size = 0.1, random_state=123)
del x_isic; del y_isic; gc.collect()

# 3. Model

In [None]:
input_shape = (imy, imx, 3)
num_classes = 9

optimizer = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)

epochs = 200
batch_size = 20

learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', patience=5, verbose=1, factor=0.5, min_lr=0.00001)
early_stopping_monitor = EarlyStopping(patience=20, monitor='val_accuracy', restore_best_weights=True)

datagen = ImageDataGenerator(
        featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=90,
        zoom_range = 0.1,
        width_shift_range=0.1,
        height_shift_range=0.1,
        horizontal_flip=True,
        vertical_flip=True,
        shear_range = 10)

In [None]:
training_shape = (imy,imx,3)
base_model = Xception(include_top=False,weights='imagenet',input_shape = training_shape)

XCeptionmodel = base_model.output
XCeptionmodel = Flatten()(XCeptionmodel)

XCeptionmodel = BatchNormalization()(XCeptionmodel)
XCeptionmodel = Dense(128, activation='relu')(XCeptionmodel)
XCeptionmodel = Dropout(0.2)(XCeptionmodel)

XCeptionmodel = BatchNormalization()(XCeptionmodel)
XCeptionoutput = Dense(num_classes, activation = 'softmax')(XCeptionmodel)
XCeptionmodel = Model(inputs=base_model.input, outputs=XCeptionoutput)

model = XCeptionmodel

for layer in base_model.layers:
    layer.trainable = True

In [None]:
model.compile(optimizer = optimizer , loss = "categorical_crossentropy", metrics=["accuracy"])
history = model.fit(datagen.flow(x_train,y_train, batch_size=batch_size),
                    epochs = epochs, 
                    validation_data = (x_validate,y_validate),
                    verbose = 1, steps_per_epoch=x_train.shape[0] // batch_size, 
                    callbacks=[learning_rate_reduction,early_stopping_monitor])

In [None]:
model.evaluate(x_validate, y_validate, verbose=1)

# 4. Test

In [None]:
train_idx_to_legion = {
    0: 'Melanoma',
    1: 'Melanocytic naevi',
    2: 'Basal cell carcinoma',
    3: 'Actinic keratosis',
    4: 'Benign keratosis',
    5: 'Dermatofibroma',
    6: 'Vascular skin lesions',
    7: 'SCC',
    8: 'UNK'
}

for lesion in list(uq_test_set.keys())[:]:
    print('===', lesion, '===')
    
    df = pd.DataFrame(columns=['[Result]'] + list(train_idx_to_legion.values()))    
    success_num = 0
    
    for idx in uq_test_set[lesion]:
        test_src_dir = '../input/siim-isic-melanoma-classification/jpeg/train/' if lesion == 'Benign keratosis' else '../input/isic-2019/ISIC_2019_Training_Input/ISIC_2019_Training_Input/'
        test_image = np.asarray(Image.open(test_src_dir + idx + '.jpg').resize((imx, imy)))
        test_x = np.expand_dims(test_image, axis=0)
        test_y = model.predict(test_x)
        
        predict = train_idx_to_legion[test_y.argmax(axis=1)[0]]
        df.loc[idx] = pd.Series([predict] + list(test_y[0]), index=df.columns)
        if lesion == predict:
            success_num += 1

    display(df)
    
    total_num = len(uq_test_set[lesion])
    print(f'-> {success_num} / {total_num} = {success_num / total_num * 100} %\n')

# 5. References

This notebook is based on https://www.kaggle.com/jnegrini/ham10000-analysis-and-model-comparison