## Load dataset

In [12]:
import numpy as np
from sklearn.datasets import load_files       
from keras.utils import np_utils
from glob import glob
import os

# define function to load train, test, and validation datasets
def load_dataset(path):
    data = load_files(path)
    dog_files = np.array(data['filenames'])
    dog_targets = np_utils.to_categorical(np.array(data['target']))
    return dog_files, dog_targets

# load train, test, and validation datasets
train_files, train_targets = load_dataset('data/train')
valid_files, valid_targets = load_dataset('data/valid')
test_files, test_targets = load_dataset('data/test')

# load list of lesion types
lesion_types = [item[11:-1] for item in sorted(glob("data/train/*/"))]

# print statistics about the dataset
print('There are %d total lesion types.' % len(lesion_types))
print('There are %s total lesion images.\n' % len(np.hstack([train_files, valid_files, test_files])))
print('There are %d training lesion images.' % len(train_files))
print('There are %d validation lesion images.' % len(valid_files))
print('There are %d test lesion images.'% len(test_files))

There are 3 total lesion types.
There are 450 total lesion images.

There are 150 training lesion images.
There are 150 validation lesion images.
There are 150 test lesion images.


## Define Utilities

In [13]:
from keras.preprocessing import image                  
from tqdm import tqdm_notebook as tqdm

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

def extract_VGG16(tensor):
	from keras.applications.vgg16 import VGG16, preprocess_input
	return VGG16(weights='imagenet', include_top=False).predict(preprocess_input(tensor))

## Preprocess Tensors

In [3]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# pre-process the data for Keras
train_tensors = paths_to_tensor(train_files).astype('float32')/255
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255
test_tensors = paths_to_tensor(test_files).astype('float32')/255

HBox(children=(IntProgress(value=0, max=150), HTML(value='')))




HBox(children=(IntProgress(value=0, max=150), HTML(value='')))




HBox(children=(IntProgress(value=0, max=150), HTML(value='')))




## Define Model Architecture - From Scratch

In [37]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

scratch_model = Sequential()
scratch_model.add(Conv2D(filters=16, kernel_size=2, padding='same', activation='relu', input_shape=(224, 224, 3)))
scratch_model.add(MaxPooling2D(pool_size=2))
scratch_model.add(Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
scratch_model.add(MaxPooling2D(pool_size=2))
scratch_model.add(Conv2D(filters=64, kernel_size=2, padding='same', activation='relu'))
scratch_model.add(MaxPooling2D(pool_size=2))
scratch_model.add(Conv2D(filters=128, kernel_size=2, padding='same', activation='relu'))
scratch_model.add(GlobalAveragePooling2D())
scratch_model.add(Dense(64, activation='relu'))
scratch_model.add(Dense(3, activation='softmax'))

scratch_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_21 (Conv2D)           (None, 224, 224, 16)      208       
_________________________________________________________________
max_pooling2d_15 (MaxPooling (None, 112, 112, 16)      0         
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 112, 112, 32)      2080      
_________________________________________________________________
max_pooling2d_16 (MaxPooling (None, 56, 56, 32)        0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 56, 56, 64)        8256      
_________________________________________________________________
max_pooling2d_17 (MaxPooling (None, 28, 28, 64)        0         
_________________________________________________________________
conv2d_24 (Conv2D)           (None, 28, 28, 128)       32896     
__________

In [38]:
scratch_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [40]:
from keras.callbacks import ModelCheckpoint  

n_epochs=20

os.makedirs("saved_models", exist_ok=True)
checkpointer = ModelCheckpoint(filepath='saved_models/scratch.weights.best.hdf5', 
                               verbose=1, save_best_only=True)

scratch_model.fit(train_tensors, train_targets, 
          validation_data=(valid_tensors, valid_targets),
          epochs=n_epochs, batch_size=20, callbacks=[checkpointer], verbose=1)

Train on 150 samples, validate on 150 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1a50bc2400>

In [43]:
scratch_model.load_weights('saved_models/scratch.weights.best.hdf5')

In [45]:
# get index of predicted dog breed for each image in test set
lesion_predictions = [np.argmax(scratch_model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors]

# report test accuracy
test_accuracy = 100*np.sum(np.array(lesion_predictions)==np.argmax(test_targets, axis=1))/len(lesion_predictions)
print('Test accuracy: %.4f%%' % test_accuracy)

Test accuracy: 54.0000%


## Create Bottleneck Features

In [4]:
# create bottleneck feature
bottleneck_features = dict()

print("Preprocessing bottleneck features - Train")
bottleneck_features['train'] = np.concatenate([extract_VGG16(path_to_tensor(img_path)) for img_path in tqdm(train_files)])

print("Preprocessing bottleneck features - Valid")
bottleneck_features['valid'] = np.concatenate([extract_VGG16(path_to_tensor(img_path)) for img_path in tqdm(valid_files)])

print("Preprocessing bottleneck features - Test")
bottleneck_features['test'] = np.concatenate([extract_VGG16(path_to_tensor(img_path)) for img_path in tqdm(test_files)])

# store to file by savez
os.makedirs("bottleneck_features", exist_ok=True)
np.savez('bottleneck_features/LesionVGG16Data', 
         train=bottleneck_features['train'], 
         valid=bottleneck_features['valid'],
         test=bottleneck_features['test'])

Preprocessing bottleneck features - Train


HBox(children=(IntProgress(value=0, max=150), HTML(value='')))


Preprocessing bottleneck features - Valid
Preprocessing bottleneck features - Test


## Load Bottleneck Features

In [3]:
bottleneck_features = np.load('bottleneck_features/LesionVGG16Data.npz')
train_VGG16 = bottleneck_features['train']
valid_VGG16 = bottleneck_features['valid']
test_VGG16 = bottleneck_features['test']

## Define Model Architecture - Pretrained

In [4]:
from keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import Dropout, Flatten, Dense
from keras.models import Sequential

VGG16_model = Sequential()

VGG16_model.add(Flatten(input_shape=train_VGG16.shape[1:]))
VGG16_model.add(Dense(64, activation='relu'))
VGG16_model.add(Dense(3, activation='softmax'))

VGG16_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 25088)             0         
_________________________________________________________________
dense_1 (Dense)              (None, 64)                1605696   
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 195       
Total params: 1,605,891
Trainable params: 1,605,891
Non-trainable params: 0
_________________________________________________________________


In [5]:
VGG16_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [6]:
from keras.callbacks import ModelCheckpoint  

n_epochs=20

os.makedirs("saved_models", exist_ok=True)
checkpointer = ModelCheckpoint(filepath='saved_models/VGG16.weights.best.hdf5', 
                               verbose=1, save_best_only=True)

VGG16_model.fit(train_VGG16, train_targets, 
          validation_data=(valid_VGG16, valid_targets),
          epochs=n_epochs, batch_size=20, callbacks=[checkpointer], verbose=1)

Train on 150 samples, validate on 150 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1a21fa3828>

In [7]:
VGG16_model.load_weights('saved_models/VGG16.weights.best.hdf5')

In [9]:
# get index of predicted dog breed for each image in test set
lesion_predictions = [np.argmax(VGG16_model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_VGG16]

# report test accuracy
test_accuracy = 100*np.sum(np.array(lesion_predictions)==np.argmax(test_targets, axis=1))/len(lesion_predictions)
print('Test accuracy: %.4f%%' % test_accuracy)

Test accuracy: 88.6667%
