In [1]:
import os, sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import skimage.io
from skimage.transform import resize
from imgaug import augmenters as iaa

import PIL
from PIL import Image
import cv2
from sklearn.utils import class_weight, shuffle
import keras
import warnings
from keras.utils import Sequence
warnings.filterwarnings("ignore")
SIZE = 299
SEED = 777
THRESHOLD = 0.2

Using TensorFlow backend.


In [2]:
# Load dataset info
DIR = '../input/'
data = pd.read_csv('../input/train.csv')

# train_dataset_info = []
# for name, labels in zip(data['Id'], data['Target'].str.split(' ')):
#     train_dataset_info.append({
#         'path':os.path.join(path_to_train, name),
#         'labels':np.array([int(label) for label in labels])})
# train_dataset_info = np.array(train_dataset_info)

In [3]:
def getTrainDataset():
    
    path_to_train = DIR + '/train/'
    data = pd.read_csv(DIR + '/train.csv')

    paths = []
    labels = []
    
    for name, lbl in zip(data['Id'], data['Target'].str.split(' ')):
        y = np.zeros(28)
        for key in lbl:
            y[int(key)] = 1
        paths.append(os.path.join(path_to_train, name))
        labels.append(y)

    return np.array(paths), np.array(labels)

def getTestDataset():
    
    path_to_test = DIR + '/test/'
    data = pd.read_csv(DIR + '/sample_submission.csv')

    paths = []
    labels = []
    
    for name in data['Id']:
        y = np.ones(28)
        paths.append(os.path.join(path_to_test, name))
        labels.append(y)

    return np.array(paths), np.array(labels)
paths, labels = getTrainDataset()

In [4]:
# credits: https://github.com/keras-team/keras/blob/master/keras/utils/data_utils.py#L302
# credits: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
from random import randint
class ProteinDataGenerator(keras.utils.Sequence):
            
    def __init__(self, paths, labels, batch_size, shape, channels = [], shuffle = False, use_cache = False, augmentor = False):
        self.paths, self.labels = paths, labels
        self.batch_size = batch_size
        self.shape = shape
        self.shuffle = shuffle
        self.use_cache = use_cache
        self.channels = channels
        self.augmentor = augmentor
        if use_cache == True:
            self.cache = np.zeros((paths.shape[0], shape[0], shape[1], len(channels)))
            self.is_cached = np.zeros((paths.shape[0]))
        self.on_epoch_end()
    
    def __len__(self):
        return int(np.ceil(len(self.paths) / float(self.batch_size)))
    
    def __getitem__(self, idx):
        indexes = self.indexes[idx * self.batch_size : (idx+1) * self.batch_size]

        paths = self.paths[indexes]
        X = np.zeros((paths.shape[0], self.shape[0], self.shape[1], self.shape[2]))
        # Generate data
        if self.use_cache == True:
            X = self.cache[indexes]
            for i, path in enumerate(paths[np.where(self.is_cached[indexes] == 0)]):
                image = self.__load_image(path)
                self.is_cached[indexes[i]] = 1
                self.cache[indexes[i]] = image
                X[i] = image
        else:
            for i, path in enumerate(paths):
                X[i] = self.__load_image(path)

        y = self.labels[indexes]
        
        return X, X
    
    def on_epoch_end(self):
        
        # Updates indexes after each epoch
        self.indexes = np.arange(len(self.paths))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __iter__(self):
        """Create a generator that iterate over the Sequence."""
        for item in (self[i] for i in range(len(self))):
            if self.augmentor == True:
                item = self.augment(item)
            yield item
            
    def __load_image(self, path):
        images = []
        for channel in self.channels:
            im = np.array(Image.open(path + '_' + channel + '.png'))
            
#             im = clahe.apply(im)
            images.append(im)
            
        if len(self.channels) >= 2:
            im = np.stack((
                images
            ), -1)
#             im = cv2.resize(im, (SIZE,SIZE))
            im = np.divide(im, 255)

        else:
            im = images[0]
#             im = cv2.resize(im, (SIZE,SIZE))
            im = np.divide(im, 255)
            im = np.expand_dims(im, 2)
        return im
    def augment(self, image):
        if randint(0,1) == 1:
            augment_img = iaa.Sequential([
                iaa.OneOf([
                    iaa.Fliplr(0.5), # horizontal flips
                    iaa.Flipud(0.5), # horizontal flips
                    iaa.Crop(percent=(0, 0.1)), # random crops
                    # Small gaussian blur with random sigma between 0 and 0.5.
                    # But we only blur about 50% of all images.
                    iaa.Sometimes(0.5,
                        iaa.GaussianBlur(sigma=(0, 0.5))
                    ),
                    # Make some images brighter and some darker.
                    # In 20% of all cases, we sample the multiplier once per channel,
                    # which can end up changing the color of the images.
                    iaa.Multiply((0.8, 1.2), per_channel=0.2),
                    # Apply affine transformations to each image.
                    # Scale/zoom them, translate/move them, rotate them and shear them.
                    iaa.Affine(
                        scale={"x": (0.9, 1.1), "y": (0.9, 1.1)},
                        translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)},
                        rotate=(-180, 180),
                        shear=(-4, 4)
                    )
                ])], random_order=True)


            image_aug = augment_img.augment_image(image)
            return image_aug
        else:
            return image
    

In [5]:
SHAPE = (512, 512, 4)

In [6]:
# channels = ["red", "green", "blue"]
# for path in paths[0:10]:
#     images = []
#     for channel in channels:
#         im = np.array(Image.open(path + '_' + channel + '.png'))
# #         im = cv2.equalizeHist(im)
#         clahe = cv2.createCLAHE()
#         im = clahe.apply(im)
# #         plt.imshow(im)
#         images.append(im)

#     if len(channels) >= 2:
#         im = np.stack((
#             images
#         ), -1)
#         im = cv2.resize(im, (SIZE,SIZE))
#         im = np.divide(im, 255)
        
        
#     else:
#         im = images[0]
#         im = cv2.resize(im, (SIZE,SIZE))
#         im = np.divide(im, 255)
#         im = np.expand_dims(im, 2)
#     plt.imshow(augment(im))

In [7]:

# class data_generator:
    
#     def create_train(dataset_info, batch_size, shape, augument=True):
#         assert shape[2] == 3
#         while True:
#             dataset_info = shuffle(dataset_info)
#             for start in range(0, len(dataset_info), batch_size):
#                 end = min(start + batch_size, len(dataset_info))
#                 batch_images = []
#                 X_train_batch = dataset_info[start:end]
#                 batch_labels = np.zeros((len(X_train_batch), 28))
#                 for i in range(len(X_train_batch)):
#                     image = data_generator.load_image(
#                         X_train_batch[i]['path'], shape)   
#                     if augument:
#                         image = data_generator.augment(image)
#                     batch_images.append(image/255.)
#                     batch_labels[i][X_train_batch[i]['labels']] = 1
#                 yield np.array(batch_images, np.float32), batch_labels

#     def load_image(path, shape):
#         image_red_ch = Image.open(path+'_red.png')
#         image_yellow_ch = Image.open(path+'_yellow.png')
#         image_green_ch = Image.open(path+'_green.png')
#         image_blue_ch = Image.open(path+'_blue.png')
#         image = np.stack((
#         np.array(image_red_ch), 
#         np.array(image_green_ch), 
#         np.array(image_blue_ch)), -1)
#         image = cv2.resize(image, (shape[0], shape[1]))
#         return image

#     def augment(image):
#         augment_img = iaa.Sequential([
#             iaa.OneOf([
#                 iaa.Affine(rotate=0),
#                 iaa.Affine(rotate=90),
#                 iaa.Affine(rotate=180),
#                 iaa.Affine(rotate=270),
#                 iaa.Fliplr(0.5),
#                 iaa.Flipud(0.5),
#             ])], random_order=True)

#         image_aug = augment_img.augment_image(image)
#         return image_aug

In [8]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, load_model
from keras.layers import Activation, Dropout, Flatten, Dense, GlobalMaxPooling2D, BatchNormalization, Input, Conv2D, MaxPooling2D
from keras.applications.inception_v3 import InceptionV3
from keras.callbacks import ModelCheckpoint
from keras import metrics
from keras.optimizers import Adam 
from keras import backend as K
import keras
from keras.models import Model
from keras.utils import multi_gpu_model

In [9]:
def create_model(input_shape, n_out, channels):
    input_tensor = Input(shape=(299,299,len(channels)))

    base_model = InceptionV3(include_top=False,
                   weights='imagenet',
                   input_shape=(299,299,3)
                            )
    bn = BatchNormalization()(input_tensor)
    x = Conv2D(3, kernel_size=(1,1), activation='relu', padding = "same")(bn)
    x = base_model(x)
    bn = BatchNormalization()(x)
    x = Conv2D(128, kernel_size=(1,1), activation='relu')(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(1024, activation='relu')(x)
    x = Dropout(0.5)(x)
#     output = Dense(n_out, activation='sigmoid')(x)
    output = Dense(n_out, activation='sigmoid')(x)
    model = Model(input_tensor, output)
    
    return model

In [10]:
def simple_model(input_shape, n_out, channels):
    input_tensor = Input(shape=(299,299,len(channels)))
    bn = BatchNormalization()(input_tensor)
    x = Conv2D(8, kernel_size=(3,3), activation='relu', padding = "same")(bn)
    x = Conv2D(8, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Conv2D(16, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = Conv2D(16, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Conv2D(32, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = Conv2D(32, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Conv2D(64, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = Conv2D(64, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Conv2D(128, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = Conv2D(128, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Conv2D(256, kernel_size=(3,3), activation='relu', padding = "valid")(x)
    x = Conv2D(256, kernel_size=(3,3), activation='relu', padding = "valid")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Flatten()(x)
    x = Dropout(0.5)(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
#     output = Dense(n_out, activation='sigmoid')(x)
    output = Dense(n_out, activation="sigmoid")(x)
    model = Model(input_tensor, output)
    
    return model

In [11]:
from keras.layers import Conv2DTranspose
def vae_model(input_shape, n_out, channels):
    input_tensor = Input(shape=(512,512,len(channels)))
    x = Conv2D(8, kernel_size=(3,3), activation='relu', padding = "same")(input_tensor)
    x = Conv2D(7, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = Conv2D(6, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = MaxPooling2D(pool_size = (2,2))(x)
    x = Conv2D(5, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = Conv2D(4, kernel_size=(3,3), activation='relu', padding = "same")(x)
    encoded = Conv2D(3, kernel_size=(3,3), activation='sigmoid', padding = "same")(x)
    
    x = Conv2DTranspose(4, kernel_size=(3,3), activation='relu', padding = "same")(encoded)
    x = Conv2DTranspose(5, kernel_size=(3,3), activation='relu', padding = "same")(x)
    
    x = Conv2DTranspose(6, kernel_size=(3,3), strides = (2, 2), activation='relu', padding = "same")(x)
    
    x = Conv2DTranspose(7, kernel_size=(3,3), activation='relu', padding = "same")(x)
    x = Conv2DTranspose(8, kernel_size=(3,3), activation='relu', padding = "same")(x)
    decoded = Conv2DTranspose(4, kernel_size=(3,3), activation='sigmoid', padding = "same")(x)
#     output = Dense(n_out, activation='sigmoid')(x)
    output = decoded
    model = Model(input_tensor, output)
    
    return model

In [12]:
def f1(y_true, y_pred):
    #y_pred = K.round(y_pred)
    y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), THRESHOLD), K.floatx())
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)
def f1_loss(y_true, y_pred):
    
    #y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), THRESHOLD), K.floatx())
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K.epsilon())
    r = tp / (tp + fn + K.epsilon())

    f1 = 2*p*r / (p+r+K.epsilon())
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return 1-K.mean(f1)

In [13]:
# # create callbacks list
# from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
# from sklearn.model_selection import train_test_split, KFold
# from keras.models import load_model
# from PIL import Image
# from matplotlib.image import imsave
# epochs = 10; batch_size = 64
# use_cache = False
# # split data into train, valid
# paths, labels = getTrainDataset()
# channels = ["green", "blue", "red", "yellow"]
# # divide to 
# keys = np.arange(paths.shape[0], dtype=np.int)  
# kf = KFold(n_splits=10)
# for i, (train_index, test_index) in enumerate(kf.split(keys)):
#     pathsTrain = paths[train_index]
#     labelsTrain = labels[train_index]
#     pathsVal = paths[test_index]
#     labelsVal = labels[test_index]

#     tg = ProteinDataGenerator(pathsTrain, labelsTrain, batch_size, SHAPE, channels, use_cache=use_cache, augmentor = False)
#     vg = ProteinDataGenerator(pathsVal, labelsVal, batch_size, SHAPE, channels, use_cache=use_cache, augmentor = False)
#     checkpoint = ModelCheckpoint('../vae/' + str(i) + '.h5', monitor='val_loss', verbose=1, 
#                              save_best_only=True, mode='min', save_weights_only = False)
#     model = vae_model(
#         input_shape=(SIZE,SIZE,len(channels)), 
#         n_out=28, channels = channels)
#     model.summary()
#     model.compile(
#     loss="mse", 
#     optimizer=Adam(1e-03)
#     )
#     hist =  model.fit_generator(
#         tg,
#         steps_per_epoch=np.ceil(float(len(pathsTrain)) / float(batch_size)),
#         validation_data=vg,
#         validation_steps=np.ceil(float(len(pathsVal)) / float(batch_size)),
#         epochs=10, 
#         verbose=1,
#         callbacks = [checkpoint])
#     model2 = Model(input=model.input, output=model.layers[7].output)
#     model2.save('../vae/vae' + str(i) + '.h5')
#     model3 = load_model('../vae/vae' + str(i) + '.h5')
    
#     for i in range(len(vg)):
#         images, _ = vg[i]
#         encodings = model3.predict(images)
#         for j, encoding in enumerate(encodings):
#             filename = pathsVal[(i*batch_size) + j].split('train/')[-1]
#             imsave("../vae/images/" + filename, encoding)

In [15]:
# create callbacks list
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from sklearn.model_selection import train_test_split, KFold
from keras.models import load_model
from PIL import Image
from matplotlib.image import imsave

In [1]:
epochs = 25; batch_size = 64
use_cache = False
# split data into train, valid
paths, labels = getTrainDataset()
channels = ["green", "blue", "red", "yellow"]
# divide to 
keys = np.arange(paths.shape[0], dtype=np.int)  
tg = ProteinDataGenerator(paths, labels, batch_size, SHAPE, channels, use_cache=use_cache, augmentor = False)
checkpoint = ModelCheckpoint('../vae/' + 'fullset_vae' + '.h5', monitor='val_loss', verbose=1, 
                         save_best_only=True, mode='min', save_weights_only = False)
model = vae_model(
    input_shape=(SIZE,SIZE,len(channels)), 
    n_out=28, channels = channels)
model.summary()
model.compile(
loss="mse", 
optimizer=Adam(1e-03)
)
hist =  model.fit_generator(
    tg,
    steps_per_epoch=np.ceil(float(len(paths)) / float(batch_size)),
    epochs=10, 
    verbose=1, 
    max_queue_size = 100,
#     use_multiprocessing = True
)
model2 = Model(input=model.input, output=model.layers[7].output)
model2.save('../vae/vae' + 'fullset_vae_model' + '.h5')
model3 = load_model('../vae/vae' + 'fullset_vae_model' + '.h5')

for i in range(len(tg)):
    images, _ = tg[i]
    encodings = model3.predict(images)
    for j, encoding in enumerate(encodings):
        filename = paths[(i*batch_size) + j].split('train/')[-1]
        imsave("../vae/images/" + filename, encoding)
        plt.imshow(encoding)
        break
    break

Using TensorFlow backend.


KeyboardInterrupt: 

In [None]:
# model2 = Model(input=model.input, output=model.layers[7].output)
# model2.save('../vae/vae' + 'fullset_vae_model' + '.h5')

In [None]:
model3 = load_model('../vae/vae' + 'fullset_vae_model' + '.h5')

In [None]:
for i in range(len(tg)):
    images, _ = tg[i]
    encodings = model3.predict(images)
    for j, encoding in enumerate(encodings):
        filename = paths[(i*batch_size) + j].split('train/')[-1]
        imsave("../vae/images/" + filename, encoding)
        plt.imshow(encoding)
        break
    break