In [1]:
#Load the modules
from glob import glob 
import numpy as np
import pandas as pd
import keras
import cv2,os
import shutil

from keras.preprocessing.image import ImageDataGenerator


import matplotlib.pyplot as plt

from sklearn.utils import shuffle
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,roc_curve,auc


Using TensorFlow backend.


In [2]:
#set paths to training and test data
#INPUT_DIR = "/media/korhan/41923BB6020712E3/kaggle/histopathologic-cancer-detection/" #adapt this path, when running locally
INPUT_DIR = "/home/korhan/Desktop/kaggle_histo/" #adapt this path, when running locally
training_dir = INPUT_DIR + 'train/'

df = pd.DataFrame({'path': glob(os.path.join(training_dir,'*.tif'))}) # load the filenames

df['id'] = df.path.map(lambda x: x.split('/')[-1].split(".")[0]) # keep only the file names in 'id'
labels = pd.read_csv(INPUT_DIR+"train_labels.csv") # read the provided labels
df = df.merge(labels, on = "id") # merge labels and filepaths
df['label'] = df['label'].astype(str)
df.head(3) # print the first three entrys
print(df.dtypes)

path     object
id       object
label    object
dtype: object


In [3]:
train_df,val_df = train_test_split(df,train_size=0.8,stratify=df['label'])

train_df.head(3)




Unnamed: 0,path,id,label
102559,/home/korhan/Desktop/kaggle_histo/train/8fc2c2...,8fc2c2f918e88573c74691a531e75604507b1ef1,0
154366,/home/korhan/Desktop/kaggle_histo/train/79a686...,79a68685684bcbd218b587deb4512cc5519add61,1
58930,/home/korhan/Desktop/kaggle_histo/train/8f5964...,8f596438b6703408196e42a060a2100fc961ebdf,0


In [4]:

def random_crop(img, random_crop_size):
    # Note: image_data_format is 'channel_last'
    assert img.shape[2] == 3
    height, width = img.shape[0], img.shape[1]
    dy, dx = random_crop_size
    x = np.random.randint(0, width - dx + 1)
    y = np.random.randint(0, height - dy + 1)
    return img[y:(y+dy), x:(x+dx), :]

def center_crop(img, crop_size):
    # Note: image_data_format is 'channel_last'
    assert img.shape[2] == 3
    height, width = img.shape[0], img.shape[1]
    dy, dx = crop_size
    x = int((width-dx)/2)
    y = int((height-dy)/2)
    return img[y:(y+dy), x:(x+dx), :]


def crop_generator(batches, crop_length):
    """Take as input a Keras ImageGen (Iterator) and generate random
    crops from the image batches generated by the original iterator.
    """
    while True:
        batch_x, batch_y = next(batches)
        batch_crops = np.zeros((batch_x.shape[0], crop_length, crop_length, 3))
        for i in range(batch_x.shape[0]):
            batch_crops[i] = center_crop(batch_x[i], (crop_length, crop_length))
        yield (batch_crops, batch_y)

def crop_generator2(batches, d):
    while True:
        batch_x, batch_y = next(batches)
        batch_crops = batch_x[:,int(d):int(2*d),int(d):int(2*d),:]
        yield (batch_crops, batch_y)      


In [5]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras


In [6]:
crop_len = 32
batch_size = 256

train_datagen = ImageDataGenerator(
        rescale=1./255,
        zoom_range=0.2,
        # rotation_range=20
        channel_shift_range=0.1,    
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.05,
        horizontal_flip=True,
        vertical_flip=True)

train_batches = train_datagen.flow_from_dataframe(
        train_df,
        x_col='path', y_col='label',
        target_size=(3*crop_len, 3*crop_len),
        batch_size=batch_size,
        class_mode='binary')

val_datagen = ImageDataGenerator(rescale=1./255)


val_batches = val_datagen.flow_from_dataframe(
        val_df,
        x_col='path', y_col='label',
        target_size=(3*crop_len, 3*crop_len),
        batch_size=batch_size,
        class_mode='binary')


#train_crops = crop_generator(train_batches, crop_len)
#val_crops = crop_generator(val_batches, crop_len)


Found 176020 images belonging to 2 classes.
Found 44005 images belonging to 2 classes.


In [7]:
batch_x, batch_y = next(train_batches)
sum(batch_y)

106.0

In [8]:
#-------Callbacks-------------#
from keras.callbacks import ModelCheckpoint,EarlyStopping,TensorBoard,CSVLogger
from keras.callbacks import ReduceLROnPlateau,LearningRateScheduler

def step_decay(epoch):
    initial_lrate=0.1
    drop=0.6
    epochs_drop = 3.0
    lrate= initial_lrate * math.pow(drop,math.floor((1+epoch)/epochs_drop))
    return lrate

checkpoint = ModelCheckpoint(
    '../models/base.model',
    monitor='val_loss',
    verbose=1,
    save_best_only=True,
    mode='min',
    save_weights_only=False,
    period=1
)
earlystop = EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=10,
    verbose=1,
    mode='auto'
)

csvlogger = CSVLogger(
    filename= "training_csv.log",
    separator = ",",
    append = False
)

#lrsched = LearningRateScheduler(step_decay,verbose=1)

reduce = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.4,
    patience=2,
    verbose=1, 
    mode='auto',
    cooldown=1 
)

import datetime

time_x = datetime.datetime.now()
log_name = time_x.strftime('%b%d_%H-%M-%S_') + '_03_512_512'


tensorboard = TensorBoard(
    log_dir = '../logs/run_'+ log_name,
    histogram_freq=0,
    batch_size=batch_size,
    write_images=False,
)

callbacks = [checkpoint,tensorboard,csvlogger,reduce]
#callbacks = [checkpoint,tensorboard]

In [9]:
from keras.applications.resnet50 import ResNet50
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K

# create the base pre-trained model
base_model = ResNet50(input_shape=(crop_len,crop_len,3),weights='imagenet', include_top=False, pooling='avg')
base_model.summary()


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 32, 32, 3)    0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 38, 38, 3)    0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 16, 16, 64)   9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 16, 16, 64)   256         conv1[0][0]                      
__________________________________________________________________________________________________
activation

In [10]:
from keras.layers import Dense, Activation, Flatten, Dropout, MaxPool2D, Cropping2D
from keras.models import Sequential, Model, Input

def build_finetune_model(base_model, dropout, fc_layers, input_shape, crop_len, num_classes, trainable=False):

    if not trainable:
        for layer in base_model.layers:
            layer.trainable = False
    
    in_layer = Input(shape=input_shape) 
    
    x = Cropping2D(cropping=crop_len, data_format=None)(in_layer)
    x = base_model(x)
#    x = base_model.output
#    x = MaxPool2D()(x)
#    x = Flatten()(x)
    for fc in fc_layers:
        # New FC layer, random init
        x = Dense(fc, activation='relu')(x) 
        x = Dropout(dropout)(x)

    # New softmax layer
    predictions = Dense(1, activation='sigmoid')(x) 
    
#    finetune_model = Model(inputs=base_model.input, outputs=predictions)
    finetune_model = Model(inputs=in_layer, outputs=predictions)

    return finetune_model

FC_LAYERS = [512, 512]
dropout = 0.3

model = build_finetune_model(base_model, 
                              dropout=dropout, 
                              fc_layers=FC_LAYERS,
                              input_shape=(96,96,3),
                              crop_len=crop_len,
                              num_classes=2,
                              trainable=True)

In [11]:
'''from keras.models import load_model
model = load_model('../models/base_resnet.model')
for layer in model.layers:
    layer.trainable = True'''

"from keras.models import load_model\nmodel = load_model('../models/base_resnet.model')\nfor layer in model.layers:\n    layer.trainable = True"

In [12]:
from keras.optimizers import Adam
# compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer=Adam(0.001), 
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 96, 96, 3)         0         
_________________________________________________________________
cropping2d_1 (Cropping2D)    (None, 32, 32, 3)         0         
_________________________________________________________________
resnet50 (Model)             (None, 2048)              23587712  
_________________________________________________________________
dense_1 (Dense)              (None, 1024)              2098176   
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 512)               524800    
_________________________________________________________________
dropout_2 (Dropout)          (None, 512)               0         
__________

In [13]:
'''import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
sess = tf.Session(config=config)
set_session(sess)  # set this TensorFlow session as the default session for Keras
'''

'import tensorflow as tf\nfrom keras.backend.tensorflow_backend import set_session\nconfig = tf.ConfigProto()\nconfig.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU\nconfig.log_device_placement = True  # to log device placement (on which device the operation ran)\nsess = tf.Session(config=config)\nset_session(sess)  # set this TensorFlow session as the default session for Keras\n'

In [14]:
history = model.fit_generator(
    #train_crops,
    train_batches,
    steps_per_epoch  = 500, 
    #validation_data  = val_crops,
    validation_data  = val_batches,
    validation_steps = 100,
    epochs = 70, 
    verbose = 2,
    use_multiprocessing=True,
    workers=6,
    class_weight={0:.83,1:1.25},
    callbacks=callbacks
)

Epoch 1/70
 - 114s - loss: 0.5050 - acc: 0.7800 - val_loss: 0.6861 - val_acc: 0.7375

Epoch 00001: val_loss improved from inf to 0.68606, saving model to ../models/base.model
Epoch 2/70
 - 105s - loss: 0.4411 - acc: 0.8084 - val_loss: 0.9874 - val_acc: 0.6141

Epoch 00002: val_loss did not improve from 0.68606
Epoch 3/70
 - 105s - loss: 0.4110 - acc: 0.8225 - val_loss: 0.4445 - val_acc: 0.8095

Epoch 00003: val_loss improved from 0.68606 to 0.44452, saving model to ../models/base.model
Epoch 4/70
 - 113s - loss: 0.3904 - acc: 0.8329 - val_loss: 0.7777 - val_acc: 0.7363

Epoch 00004: val_loss did not improve from 0.44452
Epoch 5/70
 - 108s - loss: 0.4161 - acc: 0.8238 - val_loss: 2.3353 - val_acc: 0.6843

Epoch 00005: val_loss did not improve from 0.44452

Epoch 00005: ReduceLROnPlateau reducing learning rate to 0.0004000000189989805.
Epoch 6/70
 - 109s - loss: 0.3992 - acc: 0.8317 - val_loss: 0.8059 - val_acc: 0.7030

Epoch 00006: val_loss did not improve from 0.44452
Epoch 7/70
 - 107

Process ForkPoolWorker-240:
Process ForkPoolWorker-244:
Process ForkPoolWorker-236:
Process ForkPoolWorker-239:
Process ForkPoolWorker-238:
Process ForkPoolWorker-237:
Process ForkPoolWorker-242:
Process ForkPoolWorker-243:
Process ForkPoolWorker-241:
Process ForkPoolWorker-235:
Process ForkPoolWorker-246:
Traceback (most recent call last):
  File "/home/korhan/.conda/envs/keras/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/korhan/.conda/envs/keras/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
  File "/home/korhan/.conda/envs/keras/lib/python3.6/site-packages/keras_preprocessing/image/affine_transformations.py", line 330, in apply_affine_transform
    cval=cval) for x_channel in x]
  File "/home/korhan/.conda/envs/keras/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Process ForkPoolWorker-245:
Traceback (most recent call

  File "/home/korhan/.conda/envs/keras/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
  File "/home/korhan/.conda/envs/keras/lib/python3.6/site-packages/keras_preprocessing/image/iterator.py", line 234, in _get_batches_of_transformed_samples
    x = self.image_data_generator.apply_transform(x, params)
  File "/home/korhan/.conda/envs/keras/lib/python3.6/site-packages/scipy/ndimage/interpolation.py", line 480, in affine_transform
    output, order, mode, cval, None, None)
KeyboardInterrupt
  File "/home/korhan/.conda/envs/keras/lib/python3.6/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/home/korhan/.conda/envs/keras/lib/python3.6/multiprocessing/connection.py", line 398, in _send_bytes
    self._send(buf)
  File "/home/korhan/.conda/envs/keras/lib/python3.6/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
KeyboardInterrupt
Keyboar

KeyboardInterrupt: 

  File "/home/korhan/.conda/envs/keras/lib/python3.6/site-packages/keras/utils/data_utils.py", line 401, in get_index
    return _SHARED_SEQUENCES[uid][i]
  File "/home/korhan/.conda/envs/keras/lib/python3.6/site-packages/keras_preprocessing/image/iterator.py", line 65, in __getitem__
    return self._get_batches_of_transformed_samples(index_array)
  File "/home/korhan/.conda/envs/keras/lib/python3.6/site-packages/keras_preprocessing/image/iterator.py", line 233, in _get_batches_of_transformed_samples
    params = self.image_data_generator.get_random_transform(x.shape)
  File "/home/korhan/.conda/envs/keras/lib/python3.6/site-packages/keras_preprocessing/image/image_data_generator.py", line 751, in get_random_transform
    tx = np.random.choice(self.height_shift_range)
KeyboardInterrupt
