In [1]:
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras import optimizers
from keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
from keras.applications import InceptionV3

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
base_model= InceptionV3(include_top=False, weights="imagenet", input_shape=(299,299,3))

Instructions for updating:
Colocations handled automatically by placer.


In [21]:

model= Sequential()
model.add(base_model)
model.add(Conv2D(64, (3, 3), activation = 'relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
model.add(Dropout(0.40))
model.add(Flatten())
model.add(Dense(128,activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(7, activation='softmax'))


In [22]:
data=pd.read_csv("skin-cancer-mnist-ham10000/HAM10000_metadata.csv")
data['image_full_name']=data['image_id']+'.jpg'
X=data[['image_full_name','dx','lesion_id']]

In [23]:
from sklearn.model_selection import train_test_split
Y=X.pop('dx').to_frame()
X_train, X_test, y_train, y_test   = train_test_split(X,Y, test_size=0.2, random_state=42)
X_train,X_val,y_train,y_val        =train_test_split(X_train, y_train, test_size=0.25, random_state=42)

In [24]:
train=pd.concat([X_train,y_train],axis=1)
val=pd.concat([X_val,y_val],axis=1)
test=pd.concat([X_test,y_test],axis=1)

In [25]:

from sklearn.preprocessing import LabelEncoder
encoder= LabelEncoder()
encoder.fit(val['dx'])
name_as_indexes_train=encoder.transform(val['dx']) 
val['label']=name_as_indexes_train


In [26]:
encoder=LabelEncoder()
encoder.fit(test['dx'])
name_as_indexes_test=encoder.transform(test['dx']) 
test['label']=name_as_indexes_test


In [27]:
from keras_preprocessing.image import ImageDataGenerator
train_generator = ImageDataGenerator(rescale = 1./255,
                                     rotation_range=10,  
                                     zoom_range = 0.1, 
                                     width_shift_range=0.0,  height_shift_range=0.0) 

In [28]:
train_data= train_generator.flow_from_dataframe(dataframe=train,x_col="image_full_name",y_col="dx",
                                                batch_size=64,directory="HAM1000_images/HAM1000_images",
                                                shuffle=True,class_mode="categorical",target_size=(299,299))

Found 6009 validated image filenames belonging to 7 classes.


In [29]:

test_generator=ImageDataGenerator(rescale = 1./255)

In [30]:
test_data= test_generator.flow_from_dataframe(dataframe=test,x_col="image_full_name",y_col="dx",
                                              directory="HAM1000_images/HAM1000_images",
                                              shuffle=False,batch_size=1,class_mode=None,target_size=(299,299))

Found 2003 validated image filenames.


In [31]:
val_data=test_generator.flow_from_dataframe(dataframe=val,x_col="image_full_name",y_col="dx",
                                            directory="HAM1000_images/HAM1000_images",
                                            batch_size=64,shuffle=False,class_mode="categorical",target_size=(299,299))

Found 2003 validated image filenames belonging to 7 classes.


In [32]:
from keras.callbacks import ReduceLROnPlateau
learning_control = ReduceLROnPlateau(monitor='val_acc', patience=3, verbose=1, factor=.5, min_lr=0.00001)

In [33]:
from keras.callbacks import ModelCheckpoint
# Save the model with best weights
checkpointer = ModelCheckpoint('best.hdf5', verbose=1,save_best_only=True)

In [34]:
model.compile(optimizer=optimizers.adam(lr=0.0001),loss="categorical_crossentropy",metrics=["accuracy"])
model.fit_generator(generator=train_data,
                            steps_per_epoch=train_data.samples//train_data.batch_size,
                            validation_data=val_data,
                            verbose=1,
                            validation_steps=val_data.samples//val_data.batch_size,
                            epochs=30,callbacks=[learning_control, checkpointer])

Epoch 1/30

Epoch 00001: val_loss improved from inf to 1.02800, saving model to best.hdf5
Epoch 2/30

Epoch 00002: val_loss did not improve from 1.02800
Epoch 3/30

Epoch 00003: val_loss improved from 1.02800 to 0.97649, saving model to best.hdf5
Epoch 4/30

Epoch 00004: val_loss improved from 0.97649 to 0.95081, saving model to best.hdf5
Epoch 5/30

Epoch 00005: val_loss did not improve from 0.95081
Epoch 6/30

Epoch 00006: val_loss did not improve from 0.95081
Epoch 7/30

Epoch 00007: val_loss did not improve from 0.95081
Epoch 8/30

Epoch 00008: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-05.

Epoch 00008: val_loss did not improve from 0.95081
Epoch 9/30

Epoch 00009: val_loss did not improve from 0.95081
Epoch 10/30

Epoch 00010: val_loss did not improve from 0.95081
Epoch 11/30

Epoch 00011: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-05.

Epoch 00011: val_loss did not improve from 0.95081
Epoch 12/30

Epoch 00012: val_loss did not improve fro

<keras.callbacks.History at 0x7fb9e4716f60>

In [35]:
model.save('last.hdf5')

In [36]:
test_data.reset()
predictions = model.predict_generator(test_data, steps=test_data.samples/test_data.batch_size,verbose=1)
y_pred= np.argmax(predictions, axis=1)



In [18]:
from sklearn.metrics import confusion_matrix 
cm= confusion_matrix(name_as_indexes_test,y_pred)
print(cm)

[[  26    4   28    0   11    0    0]
 [   2   59   21    0    7    4    0]
 [   3    2  182    1   24   16    0]
 [   2    2    7   13    3    1    0]
 [   2    0   31    0  170   22    1]
 [   0    2   69    0   78 1186    3]
 [   0    0    0    0    0    1   20]]
