In [1]:
from keras import applications
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from keras import optimizers
from keras.models import Sequential, Model 
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k 
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

import pandas as pd
import numpy as np
import shutil
import os

Using TensorFlow backend.


In [8]:
DATA_DIR = 'Data'
TRAIN_DIR = 'train'
VAL_DIR = 'val'
trainingData = pd.read_csv(os.path.join(DATA_DIR, 'ISIC-2017_Training_Part3_GroundTruth.csv'))
x = []
labels = trainingData.melanoma
totalImages = trainingData.image_id.unique().shape[0]
for i, img in enumerate(trainingData.image_id.unique()):
    inPath = os.path.join(DATA_DIR, 'ISIC-2017_Training_Data', img + '.jpg')
    isMel = '1' if labels[i] == 1 else '0'
    outpath = os.path.join(DATA_DIR, TRAIN_DIR if i < totalImages * 0.9 else VAL_DIR, isMel,img + '.jpg')
    shutil.copy(inPath,outpath)


In [3]:
totalImages * 0.9

1800.0

In [5]:
batch_size = 25
img_width = 224
img_height = 224
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
        rotation_range=95,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip=True,
        width_shift_range = 0.2,
        height_shift_range = 0.2)

test_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_directory(
        os.path.join(DATA_DIR, TRAIN_DIR),
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='binary')

validation_generator = test_datagen.flow_from_directory(
        os.path.join(DATA_DIR, VAL_DIR),
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode='binary')

Found 1800 images belonging to 2 classes.
Found 200 images belonging to 2 classes.


In [6]:
model = applications.vgg16.VGG16(weights='imagenet', input_shape=(img_height, img_width, 3))
for i in xrange(2):
    model.layers.pop()
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
__________

In [10]:
#Plan setup a script with small dataset to make sure everything is working once that is setup we will let things run 
#for all the images and create the bottleneck feature values


In [None]:
batch_size = 25
img_width = 224
img_height = 224
# prepare data augmentation configuration
train_datagen = ImageDataGenerator()
test_datagen = ImageDataGenerator()

train_generator = train_datagen.flow_from_directory(
        os.path.join(DATA_DIR, TRAIN_DIR),
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)

validation_generator = test_datagen.flow_from_directory(
        os.path.join(DATA_DIR, VAL_DIR),
        target_size=(img_height, img_width),
        batch_size=batch_size,
        class_mode=None,
        shuffle=False)

bottleneck_features_train = model.predict_generator(train_generator, 72)
np.save(open(os.path.join('BottleneckData', 'train', 'bottleneck_features_train.npy'), 'w'), bottleneck_features_train)
bottleneck_features_validation = model.predict_generator(validation_generator, 8)
np.save(open(os.path.join('BottleneckData', 'val','bottleneck_features_validation.npy'), 'w'), bottleneck_features_validation)

Found 1800 images belonging to 2 classes.
Found 200 images belonging to 2 classes.


In [23]:
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_4 (InputLayer)             (None, 299, 299, 3)   0                                            
____________________________________________________________________________________________________
conv2d_193 (Conv2D)              (None, 149, 149, 32)  864         input_4[0][0]                    
____________________________________________________________________________________________________
batch_normalization_193 (BatchNo (None, 149, 149, 32)  96          conv2d_193[0][0]                 
____________________________________________________________________________________________________
activation_189 (Activation)      (None, 149, 149, 32)  0           batch_normalization_193[0][0]    
___________________________________________________________________________________________