In [1]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import skimage.io
from skimage.transform import resize
from imgaug import augmenters as iaa

import h5py

import PIL
from PIL import Image
import cv2
from sklearn.utils import class_weight, shuffle
import keras
import warnings
from keras.utils import Sequence
warnings.filterwarnings("ignore")
SIZE = 512
SEED = 777
THRESHOLD = 0.2

Using TensorFlow backend.


In [2]:
# Load dataset info
DIR = '../input/'
hdf5_path = 'D:\Human-Protein-Atlas-Image-Classification\input\proteins.h5'
data = pd.read_csv('../input/train.csv')

# train_dataset_info = []
# for name, labels in zip(data['Id'], data['Target'].str.split(' ')):
#     train_dataset_info.append({
#         'path':os.path.join(path_to_train, name),
#         'labels':np.array([int(label) for label in labels])})
# train_dataset_info = np.array(train_dataset_info)

In [3]:
def getTrainDataset():
    
    path_to_train = DIR + '/train/'
    data = pd.read_csv(DIR + '/train.csv')

    paths = []
    labels = []
    
    for name, lbl in zip(data['Id'], data['Target'].str.split(' ')):
        y = np.zeros(28)
        for key in lbl:
            y[int(key)] = 1
        paths.append(os.path.join(path_to_train, name))
        labels.append(y)

    return np.array(paths), np.array(labels)

def getTestDataset():
    
    path_to_test = DIR + '/test/'
    data = pd.read_csv(DIR + '/sample_submission.csv')

    paths = []
    labels = []
    
    for name in data['Id']:
        y = np.ones(28)
        paths.append(os.path.join(path_to_test, name))
        labels.append(y)

    return np.array(paths), np.array(labels)
paths, labels = getTrainDataset()

In [4]:
# idx = 100
# batch_size = 32
# indexes = keys[idx * batch_size : (idx+1) * batch_size]
# with h5py.File(hdf5_path, "r") as f:
#     X = f["photos"][list(indexes)]

In [27]:
# credits: https://github.com/keras-team/keras/blob/master/keras/utils/data_utils.py#L302
# credits: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
from random import randint
class ProteinDataGenerator(keras.utils.Sequence):
            
    def __init__(self, paths, labels, batch_size, shape, channels = [], shuffle = False, use_cache = False, augmentor = False, use_hdf5 = False, val = False):
        self.paths, self.labels = paths, labels
        self.batch_size = batch_size
        self.shape = shape
        self.shuffle = shuffle
        self.use_cache = use_cache
        self.channels = channels
        self.augmentor = augmentor
        self.use_hdf5 = use_hdf5
        self.clahe = cv2.createCLAHE()
        self.val = val
        if use_cache == True:
            self.cache = np.zeros((paths.shape[0], shape[0], shape[1], len(channels)))
            self.is_cached = np.zeros((paths.shape[0]))
        self.on_epoch_end()
    
    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))
    
    def __getitem__(self, idx):
        indexes = self.indexes[idx * self.batch_size : (idx+1) * self.batch_size]
        # Generate data
        indexes = list(indexes)
        indexes.sort()
        with h5py.File(hdf5_path, "r") as f:
            X = f["photos"][indexes]
            y = f["labels"][indexes]
#         if self.use_cache == True:
#             X = self.cache[indexes]
#             for i, path in enumerate(paths[np.where(self.is_cached[indexes] == 0)]):
#                 image = self.__load_image(path)
#                 self.is_cached[indexes[i]] = 1
#                 self.cache[indexes[i]] = image
#                 X[i] = image
#         elif self.use_cache == False and self.use_hdf5 == False:
#             for i, path in enumerate(paths):
#                 X[i] = self.__load_image(path)
        if self.augmentor == True:
            for i, item in enumerate(X):
                X[i] = self.augment(item)
        return X, y
    
    def on_epoch_end(self):
        
        # Updates indexes after each epoch
        self.indexes = self.paths
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __iter__(self):
        """Create a generator that iterate over the Sequence."""
        for item in (self[i] for i in range(len(self))):
            yield item
            
    def __load_image(self, path):
        images = []
        for channel in self.channels:
            im = np.array(Image.open(path + '_' + channel + '.png'))
            
#             im = clahe.apply(im)
            images.append(im)
            
        if len(self.channels) >= 2:
            im = np.stack((
                images
            ), -1)
            im = cv2.resize(im, (SIZE,SIZE))
            im = np.divide(im, 255)

        else:
            im = images[0]
            im = cv2.resize(im, (SIZE,SIZE))
            im = np.divide(im, 255)
            im = np.expand_dims(im, 2)
        return im
    def augment(self, image):
        if randint(0,1) == 1:
            augment_img = iaa.Sequential([
                iaa.OneOf([
                    iaa.Fliplr(0.5), # horizontal flips
                    iaa.Flipud(0.5), # horizontal flips
                    iaa.Crop(percent=(0, 0.1)), # random crops
                    # Small gaussian blur with random sigma between 0 and 0.5.
                    # But we only blur about 50% of all images.
                    iaa.Sometimes(0.5,
                        iaa.GaussianBlur(sigma=(0, 0.5))
                    ),
                    # Make some images brighter and some darker.
                    # In 20% of all cases, we sample the multiplier once per channel,
                    # which can end up changing the color of the images.
                    iaa.Multiply((0.8, 1.2), per_channel=0.2),
                    # Apply affine transformations to each image.
                    # Scale/zoom them, translate/move them, rotate them and shear them.
                    iaa.Affine(
                        scale={"x": (0.9, 1.1), "y": (0.9, 1.1)},
                        translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)},
                        rotate=(-180, 180),
                        shear=(-4, 4)
                    )
                ])], random_order=True)


            image_aug = augment_img.augment_image(image)
            return image_aug
        else:
            return image
    

In [28]:
SHAPE = (512, 512, 4)

In [7]:
# channels = ["red", "green", "blue"]
# for path in paths[0:10]:
#     images = []
#     for channel in channels:
#         im = np.array(Image.open(path + '_' + channel + '.png'))
# #         im = cv2.equalizeHist(im)
#         clahe = cv2.createCLAHE()
#         im = clahe.apply(im)
# #         plt.imshow(im)
#         images.append(im)

#     if len(channels) >= 2:
#         im = np.stack((
#             images
#         ), -1)
#         im = cv2.resize(im, (SIZE,SIZE))
#         im = np.divide(im, 255)
        
        
#     else:
#         im = images[0]
#         im = cv2.resize(im, (SIZE,SIZE))
#         im = np.divide(im, 255)
#         im = np.expand_dims(im, 2)
#     plt.imshow(augment(im))

In [8]:

# class data_generator:
    
#     def create_train(dataset_info, batch_size, shape, augument=True):
#         assert shape[2] == 3
#         while True:
#             dataset_info = shuffle(dataset_info)
#             for start in range(0, len(dataset_info), batch_size):
#                 end = min(start + batch_size, len(dataset_info))
#                 batch_images = []
#                 X_train_batch = dataset_info[start:end]
#                 batch_labels = np.zeros((len(X_train_batch), 28))
#                 for i in range(len(X_train_batch)):
#                     image = data_generator.load_image(
#                         X_train_batch[i]['path'], shape)   
#                     if augument:
#                         image = data_generator.augment(image)
#                     batch_images.append(image/255.)
#                     batch_labels[i][X_train_batch[i]['labels']] = 1
#                 yield np.array(batch_images, np.float32), batch_labels

#     def load_image(path, shape):
#         image_red_ch = Image.open(path+'_red.png')
#         image_yellow_ch = Image.open(path+'_yellow.png')
#         image_green_ch = Image.open(path+'_green.png')
#         image_blue_ch = Image.open(path+'_blue.png')
#         image = np.stack((
#         np.array(image_red_ch), 
#         np.array(image_green_ch), 
#         np.array(image_blue_ch)), -1)
#         image = cv2.resize(image, (shape[0], shape[1]))
#         return image

#     def augment(image):
#         augment_img = iaa.Sequential([
#             iaa.OneOf([
#                 iaa.Affine(rotate=0),
#                 iaa.Affine(rotate=90),
#                 iaa.Affine(rotate=180),
#                 iaa.Affine(rotate=270),
#                 iaa.Fliplr(0.5),
#                 iaa.Flipud(0.5),
#             ])], random_order=True)

#         image_aug = augment_img.augment_image(image)
#         return image_aug

In [9]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Activation, Dropout, Flatten, Dense, GlobalMaxPooling2D, BatchNormalization, Input, Conv2D, MaxPooling2D
from keras.applications.inception_v3 import InceptionV3
from keras.callbacks import ModelCheckpoint
from keras import metrics
from keras.optimizers import Adam 
from keras import backend as K
import keras
from keras.models import Model
from keras.utils import multi_gpu_model

In [10]:
def create_model(input_shape, n_out, channels):
    input_tensor = Input(shape=(299,299,len(channels)))

    base_model = InceptionV3(include_top=False,
                   weights='imagenet',
                   input_shape=(299,299,3)
                            )
    bn = BatchNormalization()(input_tensor)
    x = Conv2D(3, kernel_size=(1,1), activation='relu', padding = "same")(bn)
    x = base_model(x)
    bn = BatchNormalization()(x)
    x = Conv2D(128, kernel_size=(1,1), activation='relu')(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
#     output = Dense(n_out, activation='sigmoid')(x)
    output = Dense(n_out, activation='sigmoid')(x)
    model = Model(input_tensor, output)
    
    return model

In [11]:
from __future__ import division

import six
from keras.models import Model
from keras.layers import (
    Input,
    Activation,
    Dense,
    Flatten
)
from keras.layers.convolutional import (
    Conv2D,
    MaxPooling2D,
    AveragePooling2D
)
from keras.layers.merge import add
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras import backend as K


def _bn_relu(input):
    """Helper to build a BN -> relu block
    """
    norm = BatchNormalization(axis=CHANNEL_AXIS)(input)
    return Activation("relu")(norm)


def _conv_bn_relu(**conv_params):
    """Helper to build a conv -> BN -> relu block
    """
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1))
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))

    def f(input):
        conv = Conv2D(filters=filters, kernel_size=kernel_size,
                      strides=strides, padding=padding,
                      kernel_initializer=kernel_initializer,
                      kernel_regularizer=kernel_regularizer)(input)
        return _bn_relu(conv)

    return f


def _bn_relu_conv(**conv_params):
    """Helper to build a BN -> relu -> conv block.
    This is an improved scheme proposed in http://arxiv.org/pdf/1603.05027v2.pdf
    """
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1))
    kernel_initializer = conv_params.setdefault("kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer", l2(1.e-4))

    def f(input):
        activation = _bn_relu(input)
        return Conv2D(filters=filters, kernel_size=kernel_size,
                      strides=strides, padding=padding,
                      kernel_initializer=kernel_initializer,
                      kernel_regularizer=kernel_regularizer)(activation)

    return f


def _shortcut(input, residual):
    """Adds a shortcut between input and residual block and merges them with "sum"
    """
    # Expand channels of shortcut to match residual.
    # Stride appropriately to match residual (width, height)
    # Should be int if network architecture is correctly configured.
    input_shape = K.int_shape(input)
    residual_shape = K.int_shape(residual)
    stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS]))
    stride_height = int(round(input_shape[COL_AXIS] / residual_shape[COL_AXIS]))
    equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS]

    shortcut = input
    # 1 X 1 conv if shape is different. Else identity.
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS],
                          kernel_size=(1, 1),
                          strides=(stride_width, stride_height),
                          padding="valid",
                          kernel_initializer="he_normal",
                          kernel_regularizer=l2(0.0001))(input)

    return add([shortcut, residual])


def _residual_block(block_function, filters, repetitions, is_first_layer=False):
    """Builds a residual block with repeating bottleneck blocks.
    """
    def f(input):
        for i in range(repetitions):
            init_strides = (1, 1)
            if i == 0 and not is_first_layer:
                init_strides = (2, 2)
            input = block_function(filters=filters, init_strides=init_strides,
                                   is_first_block_of_first_layer=(is_first_layer and i == 0))(input)
        return input

    return f


def basic_block(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
    """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34.
    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
    """
    def f(input):

        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            conv1 = Conv2D(filters=filters, kernel_size=(3, 3),
                           strides=init_strides,
                           padding="same",
                           kernel_initializer="he_normal",
                           kernel_regularizer=l2(1e-4))(input)
        else:
            conv1 = _bn_relu_conv(filters=filters, kernel_size=(3, 3),
                                  strides=init_strides)(input)

        residual = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv1)
        return _shortcut(input, residual)

    return f


def bottleneck(filters, init_strides=(1, 1), is_first_block_of_first_layer=False):
    """Bottleneck architecture for > 34 layer resnet.
    Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf
    Returns:
        A final conv layer of filters * 4
    """
    def f(input):

        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            conv_1_1 = Conv2D(filters=filters, kernel_size=(1, 1),
                              strides=init_strides,
                              padding="same",
                              kernel_initializer="he_normal",
                              kernel_regularizer=l2(1e-4))(input)
        else:
            conv_1_1 = _bn_relu_conv(filters=filters, kernel_size=(1, 1),
                                     strides=init_strides)(input)

        conv_3_3 = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv_1_1)
        residual = _bn_relu_conv(filters=filters * 4, kernel_size=(1, 1))(conv_3_3)
        return _shortcut(input, residual)

    return f


def _handle_dim_ordering():
    global ROW_AXIS
    global COL_AXIS
    global CHANNEL_AXIS
    if K.image_dim_ordering() == 'tf':
        ROW_AXIS = 1
        COL_AXIS = 2
        CHANNEL_AXIS = 3
    else:
        CHANNEL_AXIS = 1
        ROW_AXIS = 2
        COL_AXIS = 3


def _get_block(identifier):
    if isinstance(identifier, six.string_types):
        res = globals().get(identifier)
        if not res:
            raise ValueError('Invalid {}'.format(identifier))
        return res
    return identifier


class ResnetBuilder(object):
    @staticmethod
    def build(input_shape, num_outputs, block_fn, repetitions):
        """Builds a custom ResNet like architecture.
        Args:
            input_shape: The input shape in the form (nb_channels, nb_rows, nb_cols)
            num_outputs: The number of outputs at final softmax layer
            block_fn: The block function to use. This is either `basic_block` or `bottleneck`.
                The original paper used basic_block for layers < 50
            repetitions: Number of repetitions of various block units.
                At each block unit, the number of filters are doubled and the input size is halved
        Returns:
            The keras `Model`.
        """
        _handle_dim_ordering()
        if len(input_shape) != 3:
            raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)")

        # Permute dimension order if necessary
        if K.image_dim_ordering() == 'tf':
            input_shape = (input_shape[1], input_shape[2], input_shape[0])

        # Load function from str if needed.
        block_fn = _get_block(block_fn)

        input = Input(shape=input_shape)
        conv1 = _conv_bn_relu(filters=64, kernel_size=(7, 7), strides=(2, 2))(input)
        pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(conv1)

        block = pool1
        filters = 64
        for i, r in enumerate(repetitions):
            block = _residual_block(block_fn, filters=filters, repetitions=r, is_first_layer=(i == 0))(block)
            filters *= 2

        # Last activation
        block = _bn_relu(block)

        # Classifier block
        block_shape = K.int_shape(block)
        pool2 = AveragePooling2D(pool_size=(block_shape[ROW_AXIS], block_shape[COL_AXIS]),
                                 strides=(1, 1))(block)
        pool2 = Dropout(0.5)(pool2)
        flatten1 = Flatten()(pool2)
        flatten1 = Dropout(0.5)(flatten1)
        dense = Dense(units=num_outputs, kernel_initializer="he_normal",
                      activation="sigmoid")(flatten1)

        model = Model(inputs=input, outputs=dense)
        return model

    @staticmethod
    def build_resnet_18(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [2, 2, 2, 2])

    @staticmethod
    def build_resnet_34(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 4, 6, 3])

    @staticmethod
    def build_resnet_50(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 6, 3])

    @staticmethod
    def build_resnet_101(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 23, 3])

    @staticmethod
    def build_resnet_152(input_shape, num_outputs):
        return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 8, 36, 3])

In [12]:
model = ResnetBuilder.build_resnet_18((4, SIZE, SIZE), 28)

In [13]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 512, 512, 4)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 256, 256, 64) 12608       input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 256, 256, 64) 256         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 256, 256, 64) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
max_poolin

In [14]:
def simple_model(input_shape, n_out, channels):
    input_tensor = Input(shape=(input_shape[0], input_shape[1] ,len(channels)))
    bn = BatchNormalization()(input_tensor)
    x = Conv2D(8, kernel_size=(3,3), activation='relu', padding = "same")(bn)
    x = Conv2D(8, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Conv2D(16, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = Conv2D(16, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Conv2D(32, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = Conv2D(32, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Conv2D(64, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = Conv2D(64, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Conv2D(128, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = Conv2D(128, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Conv2D(256, kernel_size=(3,3), activation='relu', padding = "valid")(x)
    x = Conv2D(256, kernel_size=(3,3), activation='relu', padding = "valid")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
#     output = Dense(n_out, activation='sigmoid')(x)
    output = Dense(n_out, activation="sigmoid")(x)
    model = Model(input_tensor, output)
    
    return model

In [15]:
def f1(y_true, y_pred):
    #y_pred = K.round(y_pred)
    y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), THRESHOLD), K.floatx())
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)
def f1_loss(y_true, y_pred):
    
    #y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), THRESHOLD), K.floatx())
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return 1-K.mean(f1)

In [29]:
# create callbacks list
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split

epochs = 10; batch_size = 32;VAL_RATIO = .1;DEBUG = False
# split data into train, valid
paths, labels = getTrainDataset()

# divide to 
keys = np.arange(paths.shape[0], dtype=np.int)

np.random.seed(SEED)
lastTrainIndex = int((1-VAL_RATIO) * paths.shape[0])
if DEBUG == True:  # use only small subset for debugging, Kaggle's RAM is limited
    pathsTrain = paths[0:256]
    labelsTrain = labels[0:256]
    pathsVal = paths[lastTrainIndex:lastTrainIndex+256]
    labelsVal = labels[lastTrainIndex:lastTrainIndex+256]
    use_cache = True
else:
    pathsTrain = keys[:lastTrainIndex]
    labelsTrain = keys[:lastTrainIndex]
    pathsVal = keys[lastTrainIndex:]
    labelsVal = keys[lastTrainIndex:]
    use_cache = False

use_cache = False
channels = ["green", "blue", "red", "yellow"]
tg = ProteinDataGenerator(pathsTrain, labelsTrain, batch_size, SHAPE, channels, use_cache=use_cache, augmentor = True, use_hdf5 = True, shuffle = False)
vg = ProteinDataGenerator(pathsVal, labelsVal, batch_size, SHAPE, channels, use_cache=use_cache, augmentor = False, use_hdf5 = True, shuffle = False, val = True)

In [30]:
# create train and valid datagens
# train_generator = data_generator.create_train(
#     train_dataset_info[train_indexes], batch_size, (SIZE,SIZE,3), augument=True)
# validation_generator = data_generator.create_train(
#     train_dataset_info[valid_indexes], 32, (SIZE,SIZE,3), augument=False)

In [32]:
%%time
next(vg.__iter__())

Wall time: 612 ms


(array([[[[0.        , 0.        , 0.10196079, 0.        ],
          [0.        , 0.        , 0.13725491, 0.01568628],
          [0.        , 0.        , 0.10588235, 0.00784314],
          ...,
          [0.        , 0.        , 0.1254902 , 0.02352941],
          [0.01568628, 0.        , 0.14117648, 0.00392157],
          [0.01176471, 0.        , 0.09019608, 0.01568628]],
 
         [[0.00784314, 0.        , 0.11372549, 0.01960784],
          [0.00784314, 0.        , 0.08235294, 0.00392157],
          [0.00392157, 0.        , 0.2       , 0.01960784],
          ...,
          [0.        , 0.        , 0.13725491, 0.        ],
          [0.02352941, 0.        , 0.10588235, 0.00784314],
          [0.01568628, 0.        , 0.04705882, 0.03137255]],
 
         [[0.        , 0.        , 0.10196079, 0.        ],
          [0.00784314, 0.        , 0.09019608, 0.        ],
          [0.00392157, 0.        , 0.10980392, 0.        ],
          ...,
          [0.01176471, 0.        , 0.1764706 , 0.

In [33]:
checkpoint = ModelCheckpoint('../working/InceptionV3.h5', monitor='val_f1', verbose=1, 
                             save_best_only=True, mode='max', save_weights_only = False)
reduceLROnPlat = ReduceLROnPlateau(monitor='val_f1', factor=0.5, patience=10, 
                                   verbose=1, mode='max', epsilon=0.0001)
early = EarlyStopping(monitor="val_f1", 
                      mode="max", 
                      patience=20)
callbacks_list = [checkpoint, early, reduceLROnPlat]

In [34]:
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))

        pt_1 = K.clip(pt_1, 1e-3, .999)
        pt_0 = K.clip(pt_0, 1e-3, .999)

        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed
def KerasFocalLoss(target, input):
    
    gamma = 2.
    input = tf.cast(input, tf.float32)
    
    max_val = K.clip(-input, 0, 1)
    loss = input - input * target + max_val + K.log(K.exp(-max_val) + K.exp(-input - max_val))
    invprobs = tf.log_sigmoid(-input * (target * 2.0 - 1.0))
    loss = K.exp(invprobs * gamma) * loss
    
    return K.mean(K.sum(loss, axis=1))

In [35]:
# # warm up model
import tensorflow as tf
# # with tf.device('/cpu:0'):
model = simple_model(
    input_shape=(SIZE,SIZE,len(channels)), 
    n_out=28, channels = channels)

# # for layer in model.layers:
# #     layer.trainable = False
# # model.layers[1].trainable = True
# # model.layers[2].trainable = True
# # model.layers[-1].trainable = True
# # model.layers[-2].trainable = True
# # model.layers[-3].trainable = True
# # model.layers[-4].trainable = True
# # model.layers[-5].trainable = True
# # model.layers[-6].trainable = True

# model.summary()
# # model = multi_gpu_model(model, gpus = 2)

In [36]:
model.compile(
    loss="binary_crossentropy", 
    optimizer=Adam(1e-03),
    metrics=['binary_accuracy', f1])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         (None, 512, 512, 4)       0         
_________________________________________________________________
batch_normalization_19 (Batc (None, 512, 512, 4)       16        
_________________________________________________________________
conv2d_33 (Conv2D)           (None, 512, 512, 8)       296       
_________________________________________________________________
conv2d_34 (Conv2D)           (None, 512, 512, 8)       584       
_________________________________________________________________
max_pooling2d_8 (MaxPooling2 (None, 256, 256, 8)       0         
_________________________________________________________________
conv2d_35 (Conv2D)           (None, 256, 256, 16)      1168      
_________________________________________________________________
conv2d_36 (Conv2D)           (None, 256, 256, 16)      2320      
__________

In [None]:
hist =  model.fit_generator(
        tg,
        steps_per_epoch=np.ceil(float(len(pathsTrain)) / float(batch_size)),
        validation_data=vg,
        validation_steps=np.ceil(float(len(pathsVal)) / float(batch_size)),
        epochs=100, 
        verbose=1,
        callbacks = callbacks_list,
        max_queue_size=50)

Epoch 1/100

Epoch 00001: val_f1 improved from -inf to 0.05340, saving model to ../working/InceptionV3.h5
Epoch 2/100

Epoch 00002: val_f1 improved from 0.05340 to 0.06000, saving model to ../working/InceptionV3.h5
Epoch 3/100

Epoch 00003: val_f1 improved from 0.06000 to 0.10304, saving model to ../working/InceptionV3.h5
Epoch 4/100

Epoch 00004: val_f1 improved from 0.10304 to 0.12146, saving model to ../working/InceptionV3.h5
Epoch 5/100

Epoch 00005: val_f1 improved from 0.12146 to 0.15328, saving model to ../working/InceptionV3.h5
Epoch 6/100

Epoch 00006: val_f1 improved from 0.15328 to 0.15815, saving model to ../working/InceptionV3.h5
Epoch 7/100

Epoch 00007: val_f1 improved from 0.15815 to 0.19095, saving model to ../working/InceptionV3.h5
Epoch 8/100

Epoch 00008: val_f1 did not improve from 0.19095
Epoch 9/100

Epoch 00009: val_f1 improved from 0.19095 to 0.21158, saving model to ../working/InceptionV3.h5
Epoch 10/100

Epoch 00010: val_f1 improved from 0.21158 to 0.21778, s


Epoch 00031: val_f1 did not improve from 0.27125
Epoch 32/100

Epoch 00032: val_f1 improved from 0.27125 to 0.27344, saving model to ../working/InceptionV3.h5
Epoch 33/100

Epoch 00033: val_f1 did not improve from 0.27344
Epoch 34/100

Epoch 00034: val_f1 improved from 0.27344 to 0.27365, saving model to ../working/InceptionV3.h5
Epoch 35/100

Epoch 00035: val_f1 did not improve from 0.27365
Epoch 36/100

Epoch 00036: val_f1 did not improve from 0.27365
Epoch 37/100

Epoch 00037: val_f1 improved from 0.27365 to 0.28599, saving model to ../working/InceptionV3.h5
Epoch 38/100

Epoch 00038: val_f1 did not improve from 0.28599
Epoch 39/100

Epoch 00039: val_f1 did not improve from 0.28599
Epoch 40/100

Epoch 00040: val_f1 did not improve from 0.28599
Epoch 41/100

Epoch 00041: val_f1 did not improve from 0.28599
Epoch 42/100

Epoch 00042: val_f1 did not improve from 0.28599
Epoch 43/100

Epoch 00043: val_f1 did not improve from 0.28599
Epoch 44/100

Epoch 00044: val_f1 did not improve fro


Epoch 00063: val_f1 did not improve from 0.28978
Epoch 64/100

Epoch 00064: val_f1 did not improve from 0.28978
Epoch 65/100

Epoch 00065: val_f1 did not improve from 0.28978
Epoch 66/100

Epoch 00066: val_f1 did not improve from 0.28978

Epoch 00066: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 67/100

Epoch 00067: val_f1 did not improve from 0.28978
Epoch 68/100

Epoch 00068: val_f1 improved from 0.28978 to 0.29055, saving model to ../working/InceptionV3.h5
Epoch 69/100

Epoch 00069: val_f1 did not improve from 0.29055
Epoch 70/100

Epoch 00070: val_f1 improved from 0.29055 to 0.29171, saving model to ../working/InceptionV3.h5
Epoch 71/100

Epoch 00071: val_f1 improved from 0.29171 to 0.29239, saving model to ../working/InceptionV3.h5
Epoch 72/100

Epoch 00072: val_f1 did not improve from 0.29239
Epoch 73/100

Epoch 00073: val_f1 improved from 0.29239 to 0.29272, saving model to ../working/InceptionV3.h5
Epoch 74/100

Epoch 00074: val_f1 improved from 0.2

In [None]:
# train all layers

for layer in model.layers:
    print(layer)
    layer.trainable = True
model.compile(loss=focal_loss(),
            optimizer=Adam(lr=1e-4),
            metrics=['accuracy', f1])

In [127]:

batch_size = 12
hist =  model.fit_generator(
        tg,
        steps_per_epoch=np.ceil(float(len(pathsTrain)) / float(batch_size))/2,
        validation_data=vg,
        validation_steps=np.ceil(float(len(pathsVal)) / float(batch_size))/2,
        epochs=200, 
        verbose=1,
        callbacks=callbacks_list)
    

Epoch 1/200


StopIteration: 'list' object has no attribute 'shape'

In [None]:
# model.compile(loss=f1_loss,
#             optimizer=Adam(lr=1e-4),
#             metrics=['accuracy', f1])
# hist =  model.fit_generator(
#         tg,
#         steps_per_epoch=np.ceil(float(len(pathsTrain)) / float(batch_size))/2,
#         validation_data=vg,
#         validation_steps=np.ceil(float(len(pathsVal)) / float(batch_size))/2,
#         epochs=200, 
#         verbose=1,
#         callbacks=callbacks_list)

In [None]:
fig, ax = plt.subplots(1, 2, figsize=(15,5))
ax[0].set_title('loss')
ax[0].plot(hist.epoch, hist.history["loss"], label="Train loss")
ax[0].plot(hist.epoch, hist.history["val_loss"], label="Validation loss")
ax[1].set_title('acc')
ax[1].plot(hist.epoch, hist.history["f1"], label="Train F1")
ax[1].plot(hist.epoch, hist.history["val_f1"], label="Validation F1")
ax[0].legend()
ax[1].legend()

In [None]:
from keras.models import load_model
bestModel = load_model('../working/InceptionV3.h5', custom_objects={'f1': f1, 'f1_loss': f1_loss, 'focal_loss_fixed':focal_loss()})

In [None]:
from tqdm import tqdm
lastFullValPred = np.empty((0, 28))
lastFullValLabels = np.empty((0, 28))
for i in tqdm(range(len(vg))): 
    im, lbl = vg[i]
    scores = bestModel.predict(im)
    lastFullValPred = np.append(lastFullValPred, scores, axis=0)
    lastFullValLabels = np.append(lastFullValLabels, lbl, axis=0)
print(lastFullValPred.shape, lastFullValLabels.shape)

In [None]:
from sklearn.metrics import f1_score as off1
rng = np.arange(0, 1, 0.001)
f1s = np.zeros((rng.shape[0], 28))
for j,t in enumerate(tqdm(rng)):
    for i in range(28):
        p = np.array(lastFullValPred[:,i]>t, dtype=np.int8)
        scoref1 = off1(lastFullValLabels[:,i], p, average='binary')
        f1s[j,i] = scoref1

In [None]:
print('Individual F1-scores for each class:')
print(np.max(f1s, axis=0))
print('Macro F1-score CV =', np.mean(np.max(f1s, axis=0)))
plt.plot(rng, f1s)
T = np.empty(28)
for i in range(28):
    T[i] = rng[np.where(f1s[:,i] == np.max(f1s[:,i]))[0][0]]
print('Probability threshold maximizing CV F1-score for each class:')
print(T)

In [None]:
pathsTest, labelsTest = getTestDataset()

testg = ProteinDataGenerator(pathsTest, labelsTest, batch_size, SHAPE, channels)
submit = pd.read_csv(DIR + '/sample_submission.csv')
P = np.zeros((pathsTest.shape[0], 28))
for i in tqdm(range(len(testg))):
    images, labels = testg[i]
    score = bestModel.predict(images)
    P[i*batch_size:i*batch_size+score.shape[0]] = score

In [None]:
PP = np.array(P)

In [None]:
prediction = []

for row in tqdm(range(submit.shape[0])):
    
    str_label = ''
    
    for col in range(PP.shape[1]):
        if(PP[row, col] < T[col]):
            str_label += ''
        else:
            str_label += str(col) + ' '
    prediction.append(str_label.strip())
    
submit['Predicted'] = np.array(prediction)
submit.to_csv('transfer_1x1conv_aug_focal_loss.csv', index=False)

In [None]:
# from tqdm import tqdm
# pathsTest, labelsTest = getTestDataset()

# testg = ProteinDataGenerator(pathsTest, labelsTest, batch_size, SHAPE)
# submit = pd.read_csv(DIR + '/sample_submission.csv')
# P = np.zeros((pathsTest.shape[0], 28))
# for i in tqdm(range(len(testg))):
#     images, labels = testg[i]
#     score = bestModel.predict(images)
#     P[i*batch_size:i*batch_size+score.shape[0]] = score

In [None]:
# PP = np.array(P)

In [None]:
# prediction = []

# for row in tqdm(range(submit.shape[0])):
    
#     str_label = ''
    
#     for col in range(PP.shape[1]):
#         if(PP[row, col] < .2):   # to account for losing TP is more costly than decreasing FP
#             #print(PP[row])
#             str_label += ''
#         else:
#             str_label += str(col) + ' '
#     prediction.append(str_label.strip())
    
# submit['Predicted'] = np.array(prediction)
# submit.to_csv('datagenerator_model_v2.csv', index=False)