In [8]:

import cv2                
import numpy as np
import matplotlib.pyplot as plt    
import random

#from extract_bottleneck_features import *
from glob import glob

from keras.applications.resnet50 import decode_predictions, preprocess_input, ResNet50
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint
from keras.layers import Conv2D, Dense, Dropout, Flatten, GlobalAveragePooling2D, MaxPooling2D, PReLU
from keras.models import Sequential
from keras.preprocessing import image                  

from sklearn.datasets import load_files       

from PIL import ImageFile
from tqdm import tqdm

def extract_Xception(tensor):
    from keras.applications.xception import Xception, preprocess_input
    return Xception(weights='imagenet', include_top=False).predict(preprocess_input(tensor))

%matplotlib inline

print("All revv'ed up and ready to go!")


All revv'ed up and ready to go!


In [2]:
# define function to load train, test, and validation datasets
NUM_CATEGORIES = 3

def load_dataset(path):
    data = load_files(path)
    files = np.array(data['filenames'])
    targets = np_utils.to_categorical(np.array(data['target']), NUM_CATEGORIES)
    return files, targets

# load train, test, and validation datasets
base_folder = 'data'
valid_files, valid_targets = load_dataset('{}/valid'.format(base_folder))

# load list of disease names
disease_names = [item.split("/")[2] for item in sorted(glob("{}/valid/*/".format(base_folder)))]


In [3]:
#for item in sorted(glob("{}/valid/*/".format(base_folder))):
#    print()

print(disease_names)

print('Examples of file {} and target {}'.format(valid_files[-5:], valid_targets[-5:]))

#print(valid_files[-5:])
#print(valid_targets[-5:])

print('There are %d total disease categories.' % len(disease_names))
print('There are %d validation disease images.' % len(valid_files))


['melanoma', 'nevus', 'seborrheic_keratosis']
Examples of file ['data/valid/melanoma/ISIC_0013644.jpg' 'data/valid/nevus/ISIC_0015443.jpg'
 'data/valid/nevus/ISIC_0012313.jpg'
 'data/valid/seborrheic_keratosis/ISIC_0012720.jpg'
 'data/valid/nevus/ISIC_0007332.jpg'] and target [[ 1.  0.  0.]
 [ 0.  1.  0.]
 [ 0.  1.  0.]
 [ 0.  0.  1.]
 [ 0.  1.  0.]]
There are 3 total disease categories.
There are 150 validation disease images.


In [4]:

# These are separated as I am using my computer to setup the framework, and AWS for training
# To save space, I didn't download train/test, a total of 10GB as zip!

train_files, train_targets = load_dataset('{}/train'.format(base_folder))
test_files, test_targets = load_dataset('{}/test'.format(base_folder))

print('There are %d training disease images.' % len(train_files))
print('There are %d test disease images.'% len(test_files))

print('There are %s total disease images.\n' % len(np.hstack([train_files, valid_files, test_files])))


There are 2000 training disease images.
There are 600 test disease images.
There are 2750 total disease images.



In [5]:

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    img = image.load_img(img_path, target_size=(224, 224))
    # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
    x = image.img_to_array(img)
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)


In [6]:
                           
ImageFile.LOAD_TRUNCATED_IMAGES = True                 

# pre-process the data for Keras
train_tensors = paths_to_tensor(train_files).astype('float32')/255
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255
test_tensors = paths_to_tensor(test_files).astype('float32')/255

print("All done, be sure to run this just once (per Jupyter notebook session)!")


100%|██████████| 2000/2000 [04:32<00:00,  7.33it/s]
100%|██████████| 150/150 [00:36<00:00,  4.69it/s]
100%|██████████| 600/600 [03:47<00:00,  2.44it/s]


All done, be sure to run this just once (per Jupyter notebook session)!


In [12]:
activation_func = 'relu' # PReLU
# TODO: Prelu is better than relu!  Per a single paper I read...  FIXME: How to use here? ^^

padding_type = 'valid' # Use valid padding for now to match suggested model size per above
output_activation_func = 'softmax'
kernel_and_pool_size = 2

model = Sequential()

model.add(Conv2D(
    activation = activation_func
    , filters = 16
    , input_shape = train_tensors[0].shape
    , kernel_size = kernel_and_pool_size
    , padding = padding_type
))

model.add(MaxPooling2D(pool_size = kernel_and_pool_size))

model.add(Conv2D(
    activation = activation_func
    , filters = 32
    , kernel_size = kernel_and_pool_size
    , padding = padding_type
))

model.add(MaxPooling2D(pool_size = kernel_and_pool_size))

model.add(Conv2D(
    activation= activation_func
    , filters = 64
    , kernel_size = kernel_and_pool_size
    , padding = padding_type
))

model.add(MaxPooling2D(pool_size = kernel_and_pool_size))

model.add(Dropout(0.3))

model.add(GlobalAveragePooling2D())

#model.add(Flatten()) # GlobalAveragePooling does a form of flattenning...
#model.add(Dense(500, activation = activation_func)) # cifar10_cnn has this, leaving out for now
#model.add(Dropout(0.4))                             # in order to follow original suggestion

model.add(Dense(len(disease_names), activation = output_activation_func))


model.summary()

model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 223, 223, 16)      208       
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 111, 111, 16)      0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 110, 110, 32)      2080      
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 55, 55, 32)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 54, 54, 64)        8256      
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 27, 27, 64)        0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 27, 27, 64)        0         
__________

In [20]:

epochs = 100

checkpointer = ModelCheckpoint(
    filepath = 'saved_models/weights.best.from_scratch.hdf5'
    , save_best_only = True
    , verbose = 1
)

model.fit(train_tensors, train_targets
          , batch_size = 20
          , callbacks = [checkpointer]
          , epochs = epochs
          , validation_data = (valid_tensors, valid_targets)
          , verbose = 1
         )


Train on 2000 samples, validate on 150 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100


Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100


Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100


Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.History at 0x7f0e81a8ce80>

In [21]:

model.load_weights('saved_models/weights.best.from_scratch.hdf5')

# get index of predicted dog breed for each image in test set
dog_breed_predictions = [np.argmax(model.predict(np.expand_dims(tensor, axis=0))) for tensor in test_tensors]

# report test accuracy
test_accuracy = 100*np.sum(np.array(dog_breed_predictions)==np.argmax(test_targets, axis=1))/len(dog_breed_predictions)
print('Test accuracy: %.4f%%' % test_accuracy)


Test accuracy: 64.0000%


In [22]:

def disease(image_path):
    img_tensor = path_to_tensor(image_path)
    #feature = extract_Xception(img_tensor)
    #print(feature)
    #print(feature.shape)
    index = np.argmax(model.predict(img_tensor)) #np.expand_dims(feature, axis=0)))
    
    return disease_names[index]

print(disease('./data/valid/melanoma/ISIC_0012099.jpg')
      , disease('./data/valid/nevus/ISIC_0001769.jpg')
      ,disease('./data/valid/seborrheic_keratosis/ISIC_0012143.jpg'))


nevus nevus nevus
