# Dermatologist AI mini project from Udacity
This mini project visually diagnoses melanoma, nevus and seborrheic keratosis, which is from [Udacity Deep Learning Nanodegree](https://www.udacity.com/course/deep-learning-nanodegree--nd101).<br>
This doesn't include data set for training because it doesn't belong to me, but the data and objective are pulled from the [2017 ISIC Challenge on Skin Lesion Analysis Towards Melanoma Detection](https://challenge.kitware.com/#challenge/583f126bcad3a51cc66c8d9a).<br>
https://isic-archive.com/#images <br>
(Because of 'Memory Error' issue, I delete python objects from time to time in this notebook)

## Import Datasets

In [1]:
from sklearn.datasets import load_files   
from keras.utils import np_utils
import numpy as np
from glob import glob

def load_dataset(data_path, shuffle=None):
    kwargs = {}
    if shuffle != None:
        kwargs['shuffle'] = shuffle
    data = load_files(data_path, **kwargs)
    img_files = np.array(data['filenames'])
    targets = np_utils.to_categorical(np.array(data['target']), 3)
    return img_files, targets

train_files, train_targets = load_dataset('data/train')
valid_files, valid_targets = load_dataset('data/valid')
test_files, test_targets = load_dataset('data/test', shuffle=False)

# load lables
label_name = [item[11:-1] for item in sorted(glob("data/train/*/"))]

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
print('train_files size: {}'.format(len(train_files)))
print('train_files shape: {}'.format(train_files.shape))
print('target shape: {}'.format(train_targets.shape))
print(label_name)

train_files size: 2000
train_files shape: (2000,)
target shape: (2000, 3)
['melanoma', 'nevus', 'seborrheic_keratosis']


In [3]:
from keras.preprocessing import image
from keras.applications.inception_resnet_v2 import preprocess_input
from tqdm import tqdm


def path_to_tensor(img_path):
    img = image.load_img(img_path, target_size=(384, 256))
    x = image.img_to_array(img)
    return np.expand_dims(x, axis=0)

def paths_to_tensor(image_paths):
    return np.vstack([path_to_tensor(path) for path in image_paths])


## Network with transfer learning
### Loading images into tensors

In [4]:
train_tensors = paths_to_tensor(tqdm(train_files))
valid_tensors = paths_to_tensor(tqdm(valid_files))
test_tensors = paths_to_tensor(tqdm(test_files))

print(train_tensors.shape)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2000/2000 [04:18<00:00,  7.74it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 150/150 [00:32<00:00,  4.57it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 600/600 [03:17<00:00,  3.04it/s]


(2000, 384, 256, 3)


### Image augmentation
To enlarge train data I tried image augmentation using flip.
At first I tried to transform using flip, zoom and shift, but I thought it can twist training features much because medical images are very sensitive, so I tried to use flip only.
However __image transform didn't help this network and dataset, so I disabled it in this project__.

In [5]:
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator


apply_train_image_transform = False

if apply_train_image_transform:
    # Caution: Doesn't guarantee prevention of duplication.
    datagen_train = ImageDataGenerator(
        horizontal_flip=True,
        vertical_flip=True)
    
    datagen_train.fit(train_tensors)
    shape = (train_tensors.shape[0] * 2,) + train_tensors.shape[1:]
    generated = np.ndarray(shape=shape)
    for i, image in tqdm(enumerate(train_tensors)):
        generated[i] = datagen_train.random_transform(image)
    
    train_tensors = np.concatenate((train_tensors, generated))
    train_targets = train_targets.repeat(2, axis=0)

### Transfer learning using Inception Resnet V2

In [6]:
# train_imgs_preprocess is created below
train_imgs_preprocess = preprocess_input(train_tensors)
valid_imgs_preprocess = preprocess_input(valid_tensors)
test_imgs_preprocess = preprocess_input(test_tensors)
del train_tensors, valid_tensors, test_tensors

In [7]:
from keras.applications.inception_resnet_v2 import InceptionResNetV2
transfer_model = InceptionResNetV2(include_top=False)

train_data = transfer_model.predict(train_imgs_preprocess)
valid_data = transfer_model.predict(valid_imgs_preprocess)
test_data = transfer_model.predict(test_imgs_preprocess)

del train_imgs_preprocess, valid_imgs_preprocess, test_imgs_preprocess
print(train_data.shape)

(2000, 10, 6, 1536)


In [8]:
import gc
gc.collect()

358680

### Network Model

In [9]:
from keras.layers import Conv2D, Dropout, Flatten, Dense, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential

my_model = Sequential()

my_model.add(GlobalAveragePooling2D(input_shape=train_data.shape[1:]))
my_model.add(Dropout(0.2))
my_model.add(Dense(1024, activation='relu'))
my_model.add(Dropout(0.2))

my_model.add(Dense(3, activation='softmax'))

my_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
global_average_pooling2d_1 ( (None, 1536)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 1536)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              1573888   
_________________________________________________________________
dropout_2 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 3075      
Total params: 1,576,963
Trainable params: 1,576,963
Non-trainable params: 0
_________________________________________________________________


In [10]:
my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [11]:
from keras.callbacks import ModelCheckpoint


checkpoint_filepath = 'saved_models/weights.best.my.hdf5'

my_checkpointer = ModelCheckpoint(filepath=checkpoint_filepath,
                               verbose=1, save_best_only=True)

my_model.fit(train_data, train_targets, 
          validation_data=(valid_data, valid_targets),
          epochs=60, batch_size=200, callbacks=[my_checkpointer], verbose=1)

Train on 2000 samples, validate on 150 samples
Epoch 1/60

Epoch 00001: val_loss improved from inf to 1.22366, saving model to saved_models/weights.best.my.hdf5
Epoch 2/60

Epoch 00002: val_loss improved from 1.22366 to 0.91720, saving model to saved_models/weights.best.my.hdf5
Epoch 3/60

Epoch 00003: val_loss improved from 0.91720 to 0.86689, saving model to saved_models/weights.best.my.hdf5
Epoch 4/60

Epoch 00004: val_loss improved from 0.86689 to 0.81657, saving model to saved_models/weights.best.my.hdf5
Epoch 5/60

Epoch 00005: val_loss did not improve from 0.81657
Epoch 6/60

Epoch 00006: val_loss did not improve from 0.81657
Epoch 7/60

Epoch 00007: val_loss improved from 0.81657 to 0.77670, saving model to saved_models/weights.best.my.hdf5
Epoch 8/60

Epoch 00008: val_loss did not improve from 0.77670
Epoch 9/60

Epoch 00009: val_loss did not improve from 0.77670
Epoch 10/60

Epoch 00010: val_loss did not improve from 0.77670
Epoch 11/60

Epoch 00011: val_loss did not improve 

Epoch 00020: val_loss did not improve from 0.71352
Epoch 21/60

Epoch 00021: val_loss did not improve from 0.71352
Epoch 22/60

Epoch 00022: val_loss did not improve from 0.71352
Epoch 23/60

Epoch 00023: val_loss improved from 0.71352 to 0.69920, saving model to saved_models/weights.best.my.hdf5
Epoch 24/60

Epoch 00024: val_loss did not improve from 0.69920
Epoch 25/60

Epoch 00025: val_loss improved from 0.69920 to 0.68075, saving model to saved_models/weights.best.my.hdf5
Epoch 26/60

Epoch 00026: val_loss did not improve from 0.68075
Epoch 27/60

Epoch 00027: val_loss did not improve from 0.68075
Epoch 28/60

Epoch 00028: val_loss did not improve from 0.68075
Epoch 29/60

Epoch 00029: val_loss did not improve from 0.68075
Epoch 30/60

Epoch 00030: val_loss did not improve from 0.68075
Epoch 31/60

Epoch 00031: val_loss did not improve from 0.68075
Epoch 32/60

Epoch 00032: val_loss did not improve from 0.68075
Epoch 33/60

Epoch 00033: val_loss did not improve from 0.68075
Epoch 3


Epoch 00041: val_loss did not improve from 0.66539
Epoch 42/60

Epoch 00042: val_loss did not improve from 0.66539
Epoch 43/60

Epoch 00043: val_loss did not improve from 0.66539
Epoch 44/60

Epoch 00044: val_loss did not improve from 0.66539
Epoch 45/60

Epoch 00045: val_loss did not improve from 0.66539
Epoch 46/60

Epoch 00046: val_loss did not improve from 0.66539
Epoch 47/60

Epoch 00047: val_loss did not improve from 0.66539
Epoch 48/60

Epoch 00048: val_loss did not improve from 0.66539
Epoch 49/60

Epoch 00049: val_loss did not improve from 0.66539
Epoch 50/60

Epoch 00050: val_loss did not improve from 0.66539
Epoch 51/60

Epoch 00051: val_loss did not improve from 0.66539
Epoch 52/60

Epoch 00052: val_loss did not improve from 0.66539
Epoch 53/60

Epoch 00053: val_loss did not improve from 0.66539
Epoch 54/60

Epoch 00054: val_loss did not improve from 0.66539
Epoch 55/60

Epoch 00055: val_loss did not improve from 0.66539
Epoch 56/60

Epoch 00056: val_loss did not improve f

<keras.callbacks.History at 0x292d240f6a0>

In [12]:
my_model.load_weights(checkpoint_filepath)

### Evaluation

In [13]:
import csv


my_predictions = [my_model.predict(np.expand_dims(feature, axis=0)) for feature in test_data]

# test_accuracy = 100 * np.sum(np.array(my_predictions)==np.argmax(test_targets, axis=1)) / len(my_predictions)
# print('Test accuracy: %.4f%%' % test_accuracy)

with open('my_transfer.csv', 'w', newline='') as csvfile:
    result_writger = csv.writer(csvfile)
    result_writger.writerow(['Id', 'task_1', 'task_2'])
    for test_filepath, test_prediction in zip(test_files, my_predictions):
        result_writger.writerow([test_filepath, test_prediction[0][0], test_prediction[0][2]])

In [None]:
Result of ROC AUC
Category 1 Score: 0.759
Category 2 Score: 0.877
Category 3 Score: 0.818

### Cleaning up python object

In [15]:
del train_data, valid_data, test_data, transfer_model, my_model
gc.collect()

1177

## Network without transfer learning
### Loading images into tensors

In [16]:
train_tensors = paths_to_tensor(tqdm(train_files))
train_tensors = train_tensors.astype('float32') / 255

valid_tensors = paths_to_tensor(tqdm(valid_files))
valid_tensors = valid_tensors.astype('float32') / 255

test_tensors = paths_to_tensor(tqdm(test_files))
test_tensors = test_tensors.astype('float32') / 255

print(train_tensors.shape)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2000/2000 [04:14<00:00,  7.84it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 150/150 [00:32<00:00,  4.60it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 600/600 [03:22<00:00,  2.97it/s]


(2000, 384, 256, 3)


## Network Model 2 for CNN without transfer learning

In [26]:
from keras.layers import Conv2D, Dropout, Flatten, Dense, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential

my_model = Sequential()

my_model.add(Conv2D(filters=16, kernel_size=3, padding='same', activation='relu', 
                        input_shape=train_tensors.shape[1:]))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.2))

my_model.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.2))

my_model.add(Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.2))

my_model.add(Conv2D(filters=1024, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.1))

my_model.add(Conv2D(filters=2048, kernel_size=3, padding='same', activation='relu'))
my_model.add(MaxPooling2D(pool_size=2))
my_model.add(Dropout(0.1))

# my_model.add(Conv2D(filters=512, kernel_size=3, padding='same', activation='relu'))
# my_model.add(MaxPooling2D(pool_size=2))
# my_model.add(Dropout(0.1))

# my_model.add(Conv2D(filters=1024, kernel_size=3, padding='same', activation='relu'))
# my_model.add(MaxPooling2D(pool_size=2))
# my_model.add(Dropout(0.1))

my_model.add(GlobalAveragePooling2D())

my_model.add(Dense(3, activation='softmax'))

my_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_211 (Conv2D)          (None, 384, 256, 16)      448       
_________________________________________________________________
max_pooling2d_12 (MaxPooling (None, 192, 128, 16)      0         
_________________________________________________________________
dropout_11 (Dropout)         (None, 192, 128, 16)      0         
_________________________________________________________________
conv2d_212 (Conv2D)          (None, 192, 128, 64)      9280      
_________________________________________________________________
max_pooling2d_13 (MaxPooling (None, 96, 64, 64)        0         
_________________________________________________________________
dropout_12 (Dropout)         (None, 96, 64, 64)        0         
_________________________________________________________________
conv2d_213 (Conv2D)          (None, 96, 64, 256)       147712    
__________

In [27]:
my_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [28]:
gc.collect()

11130

In [30]:
from keras.callbacks import ModelCheckpoint
import os


checkpoint_filepath = 'saved_models/weights.best.my.hdf5'

my_checkpointer = ModelCheckpoint(filepath=checkpoint_filepath,
                               verbose=1, save_best_only=True)

my_model.fit(train_tensors, train_targets, 
          validation_data=(valid_tensors, valid_targets),
          epochs=40, batch_size=100, callbacks=[my_checkpointer], verbose=1)

Train on 2000 samples, validate on 150 samples
Epoch 1/40

Epoch 00001: val_loss improved from inf to 0.83356, saving model to saved_models/weights.best.my.hdf5
Epoch 2/40

Epoch 00002: val_loss did not improve from 0.83356
Epoch 3/40

Epoch 00003: val_loss did not improve from 0.83356
Epoch 4/40

Epoch 00004: val_loss improved from 0.83356 to 0.81872, saving model to saved_models/weights.best.my.hdf5
Epoch 5/40

Epoch 00005: val_loss improved from 0.81872 to 0.81811, saving model to saved_models/weights.best.my.hdf5
Epoch 6/40

Epoch 00006: val_loss improved from 0.81811 to 0.79340, saving model to saved_models/weights.best.my.hdf5
Epoch 7/40

Epoch 00007: val_loss improved from 0.79340 to 0.78957, saving model to saved_models/weights.best.my.hdf5
Epoch 8/40

Epoch 00008: val_loss did not improve from 0.78957
Epoch 9/40



Epoch 00009: val_loss improved from 0.78957 to 0.78387, saving model to saved_models/weights.best.my.hdf5
Epoch 10/40

Epoch 00010: val_loss improved from 0.78387 to 0.77043, saving model to saved_models/weights.best.my.hdf5
Epoch 11/40

Epoch 00011: val_loss did not improve from 0.77043
Epoch 12/40

Epoch 00012: val_loss improved from 0.77043 to 0.76749, saving model to saved_models/weights.best.my.hdf5
Epoch 13/40

Epoch 00013: val_loss did not improve from 0.76749
Epoch 14/40

Epoch 00014: val_loss did not improve from 0.76749
Epoch 15/40

Epoch 00015: val_loss did not improve from 0.76749
Epoch 16/40

Epoch 00016: val_loss did not improve from 0.76749
Epoch 17/40



Epoch 00017: val_loss did not improve from 0.76749
Epoch 18/40

Epoch 00018: val_loss improved from 0.76749 to 0.76621, saving model to saved_models/weights.best.my.hdf5
Epoch 19/40

Epoch 00019: val_loss did not improve from 0.76621
Epoch 20/40

Epoch 00020: val_loss did not improve from 0.76621
Epoch 21/40

Epoch 00021: val_loss did not improve from 0.76621
Epoch 22/40

Epoch 00022: val_loss did not improve from 0.76621
Epoch 23/40

Epoch 00023: val_loss did not improve from 0.76621
Epoch 24/40

Epoch 00024: val_loss did not improve from 0.76621
Epoch 25/40



Epoch 00025: val_loss did not improve from 0.76621
Epoch 26/40

Epoch 00026: val_loss did not improve from 0.76621
Epoch 27/40

Epoch 00027: val_loss improved from 0.76621 to 0.73744, saving model to saved_models/weights.best.my.hdf5
Epoch 28/40

Epoch 00028: val_loss did not improve from 0.73744
Epoch 29/40

Epoch 00029: val_loss did not improve from 0.73744
Epoch 30/40

Epoch 00030: val_loss did not improve from 0.73744
Epoch 31/40

Epoch 00031: val_loss did not improve from 0.73744
Epoch 32/40

Epoch 00032: val_loss did not improve from 0.73744
Epoch 33/40

Epoch 00033: val_loss did not improve from 0.73744
Epoch 34/40



Epoch 00034: val_loss did not improve from 0.73744
Epoch 35/40

Epoch 00035: val_loss did not improve from 0.73744
Epoch 36/40

Epoch 00036: val_loss did not improve from 0.73744
Epoch 37/40

Epoch 00037: val_loss did not improve from 0.73744
Epoch 38/40

Epoch 00038: val_loss did not improve from 0.73744
Epoch 39/40

Epoch 00039: val_loss did not improve from 0.73744
Epoch 40/40

Epoch 00040: val_loss did not improve from 0.73744


<keras.callbacks.History at 0x292d31c6ef0>

In [31]:
my_model.load_weights(checkpoint_filepath)

In [32]:
import csv


my_predictions = [my_model.predict(np.expand_dims(feature, axis=0)) for feature in test_tensors]

# test_accuracy = 100 * np.sum(np.array(my_predictions)==np.argmax(test_targets, axis=1)) / len(my_predictions)
# print('Test accuracy: %.4f%%' % test_accuracy)

with open('my_cnn.csv', 'w', newline='') as csvfile:
    result_writger = csv.writer(csvfile)
    result_writger.writerow(['Id', 'task_1', 'task_2'])
    for test_filepath, test_prediction in zip(test_files, my_predictions):
        result_writger.writerow([test_filepath, test_prediction[0][0], test_prediction[0][2]])

In [33]:
my_predictions

[array([[0.0537998 , 0.3720544 , 0.57414573]], dtype=float32),
 array([[0.23505117, 0.40950906, 0.35543972]], dtype=float32),
 array([[0.25806636, 0.6062299 , 0.13570368]], dtype=float32),
 array([[0.4129069 , 0.5557064 , 0.03138682]], dtype=float32),
 array([[0.23806821, 0.68993616, 0.07199564]], dtype=float32),
 array([[0.14545488, 0.8195845 , 0.03496062]], dtype=float32),
 array([[0.17491573, 0.72190344, 0.10318081]], dtype=float32),
 array([[0.15154465, 0.5129464 , 0.335509  ]], dtype=float32),
 array([[0.2722508 , 0.68131775, 0.0464314 ]], dtype=float32),
 array([[0.4218841 , 0.53918225, 0.03893368]], dtype=float32),
 array([[0.24519944, 0.72761863, 0.02718199]], dtype=float32),
 array([[0.21201548, 0.51336855, 0.27461597]], dtype=float32),
 array([[0.18136051, 0.4665843 , 0.3520552 ]], dtype=float32),
 array([[0.16907156, 0.34839672, 0.4825317 ]], dtype=float32),
 array([[0.0588286 , 0.9238437 , 0.01732777]], dtype=float32),
 array([[0.2415644 , 0.7562038 , 0.00223186]], dtype=fl

In [None]:
Result of ROC AUC
Category 1 Score: 0.575
Category 2 Score: 0.849
Category 3 Score: 0.712