In [None]:
%matplotlib inline
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

In [None]:
import os
import shutil
print(os.listdir("../input"))

try:
    os.makedirs('/tmp/.keras/datasets')
except FileExistsError:
    pass

try:
    shutil.copytree("../input/keras-pretrained-models", "/tmp/.keras/models")
except FileExistsError:
    pass

In [None]:
import os.path
import itertools
from itertools import chain

import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn import cluster, datasets, mixture
from sklearn.datasets import load_digits
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import f1_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns

import tensorflow as tf

from keras.layers import Input, Embedding, LSTM, GRU, Dense, Dropout, Lambda, \
    Conv1D, Conv2D, Conv3D, \
    Conv2DTranspose, \
    AveragePooling1D, AveragePooling2D, \
    MaxPooling1D, MaxPooling2D, MaxPooling3D, \
    GlobalAveragePooling1D, GlobalAveragePooling2D, \
    GlobalMaxPooling1D, GlobalMaxPooling2D, GlobalMaxPooling3D, \
    LocallyConnected1D, LocallyConnected2D, \
    concatenate, Flatten, Average, Activation, \
    RepeatVector, Permute, Reshape, Dot, \
    multiply, dot, add, \
    PReLU, \
    Bidirectional, TimeDistributed, \
    SpatialDropout1D, \
    BatchNormalization
from keras.models import Model, Sequential
from keras import losses
from keras.callbacks import BaseLogger, ProgbarLogger, Callback, History
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau
from keras.wrappers.scikit_learn import KerasClassifier
from keras import regularizers
from keras import initializers
from keras.metrics import categorical_accuracy
from keras.constraints import maxnorm, non_neg
from keras.optimizers import RMSprop, Adam
from keras.utils import to_categorical, plot_model, Sequence
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K

In [None]:
from PIL import Image
from zipfile import ZipFile
import h5py
import cv2
from tqdm import tqdm

In [None]:
ls -la ../input

In [None]:
src_dir = '../input/aptos2019-blindness-detection'
ziped_src_dir_train = '../input/aptos2019-resize560x420-float16-train'
ziped_src_dir_test = '../input/aptos2019-resize560x420-float16-test'

In [None]:
train_labels = pd.read_csv(os.path.join(src_dir, "train.csv"))
print(train_labels.shape)
train_labels.head(10)

In [None]:
y_train = train_labels.diagnosis.values
y_train

In [None]:
y_cat_train = to_categorical(y_train)
y_cat_train

In [None]:
test_labels = pd.read_csv(os.path.join(src_dir, "sample_submission.csv"))
print(test_labels.shape)
test_labels.head()

### load float16 image data

In [None]:
x_train_img = np.load(os.path.join(ziped_src_dir_train, 'train_img_float16.npz'))['x']
x_train_img.shape, x_train_img.min(), x_train_img.max()

In [None]:
x_test_img = np.load(os.path.join(ziped_src_dir_test, 'test_img_float16.npz'))['x']
x_test_img.shape, x_test_img.min(), x_test_img.max()

In [None]:
plt.imshow(x_train_img[0].astype('float32'))

In [None]:
dic_train = dict(list(zip(*(train_labels['id_code'].tolist(), train_labels.index.tolist()))))
dic_train

In [None]:
dic_test = dict(list(zip(*(test_labels['id_code'].tolist(), test_labels.index.tolist()))))
dic_test

In [None]:
def get_arr0(Id, test=False):
    if test:
        x_img = x_test_img
        d = dic_test
    else:
        x_img = x_train_img
        d = dic_train
    #arr = x_img[d[Id]]
    arr = x_img[d[Id]].astype('float32')
    return arr

arr0 = get_arr0('0083ee8054ee')
print(arr0.shape)
plt.imshow(arr0)

In [None]:
arr0 = get_arr0('006efc72b638', test=True)
print(arr0.shape)
plt.imshow(arr0)

In [None]:
datagen = ImageDataGenerator(
    # set input mean to 0 over the dataset
    featurewise_center=False,
    # set each sample mean to 0
    samplewise_center=False,
    # divide inputs by std of dataset
    featurewise_std_normalization=False,
    # divide each input by its std
    samplewise_std_normalization=False,
    # apply ZCA whitening
    zca_whitening=False,
    # epsilon for ZCA whitening
    zca_epsilon=1e-06,
    # randomly rotate images in the range (deg 0 to 180)
    rotation_range=10,
    # randomly shift images horizontally
    width_shift_range=0.0,
    # randomly shift images vertically
    height_shift_range=0.0,
    # set range for random shear
    shear_range=0.,
    # set range for random zoom
    zoom_range=0.,
    # set range for random channel shifts
    channel_shift_range=0.,
    # set mode for filling points outside the input boundaries
    fill_mode='nearest',
    # value used for fill_mode = "constant"
    cval=0.,
    # randomly flip images
    horizontal_flip=True,
    # randomly flip images
    vertical_flip=True,
    # set rescaling factor (applied before any other transformation)
    rescale=None,
    # set function that will be applied on each input
    preprocessing_function=None,
    # image data format, either "channels_first" or "channels_last"
    data_format=None,
    # fraction of images reserved for validation (strictly between 0 and 1)
    validation_split=0.0)

In [None]:
arr1 = datagen.random_transform(arr0)
print(arr1.shape)
plt.imshow(arr1)

In [None]:
import random

class Seq(object):
    
    def __init__(self, df, aug=False, test=False, batch_size=32):
        self.shuffle = None
        self.aug = aug
        self.test = test
        self.batch_size = batch_size
        self.df = df
        
        # proccess
        self.ids = self.df.id_code.tolist()
        #self.reversed = sorted(range(SH_ALL[0]), reverse=True)
        
        # estimate self length
        self.initialize_it()
        self.len = 1
        for _ in self.it:
            self.len += 1
        
        self.initialize_it()
    
    def initialize_it(self):
        if self.shuffle:
            '''not implemented yet'''
            raise NotImplementedError
            #random.seed(self.state)
            #random.shuffle(self.ids)
        
        self.it = iter(range(0, len(self.ids), self.batch_size))
        self.idx_next = self.it.__next__()
    
    def __len__(self):
        return self.len
    
    def __iter__(self):
        return self
    
    def __next__(self):
        idx = self.idx_next
        self.ids_part = self.ids[idx:((idx+self.batch_size) if idx+self.batch_size<len(self.ids) else len(self.ids))]
        res = self.getpart(self.ids_part)
        try:
            self.idx_next = self.it.__next__()
        except StopIteration:
            self.initialize_it()
        return res
    
    def __getitem__(self, id0):
        arr, tgts = self.get_data(id0)
        cat = self.convert_tgts(tgts)
        return arr, cat
    
    def random_transform(self, arr):
        return datagen.random_transform(arr)
    
    def convert_tgts(self, tgts):
        try:
            cat = to_categorical(tgts, num_classes=5)
        except TypeError:
            cat = np.zeros((5,))
        return cat
    
    def get_data(self, id0):
        arr = get_arr0(id0, test=self.test)
        
        try:
            y = (self.df.diagnosis[self.df.id_code == id0]).tolist()[0]
        except AttributeError:
            y = None
        return arr, y
    
    def getpart(self, ids):
        xs = []
        ys = []
        for id0 in ids:
            self.extend_data(id0, xs, ys)
        
        x = np.stack(xs)
        y = np.stack(ys)
        return (x, y)
    
    def extend_data(self, id0, xs, ys):
        arr0, cat = self[id0]
        
        if False:
            mm = up_sample2[cat==1].max()
            mm = int(mm)
            #print(mm)
            for ii in range(mm):
                if self.aug:
                    img = self.random_transform(arr0)
                else:
                    img = arr0
                xs.append(img.flatten())
                ys.append(cat)
        else:
            if self.aug:
                img0 = self.random_transform(arr0)
            else:
                img0 = arr0
            xs.append(img0)
            ys.append(cat)

In [None]:
seq = Seq(train_labels, aug=True, batch_size=8)
print(len(seq))

In [None]:
seq['0083ee8054ee']

In [None]:
seq.get_data('0083ee8054ee')

In [None]:
x, y = next(seq)
x.shape, y.shape

In [None]:
plt.imshow(x[0])

In [None]:
'test image'
seq_test = Seq(test_labels, aug=False, batch_size=8, test=True)
print(len(seq_test))
seq_test.get_data('006efc72b638')

In [None]:
class ImageGenerator(Sequence):
    
    def __init__(self, seq):
        self.seq = seq
        
    def __len__(self):
        return len(self.seq)
    
    def __getitem__(self, idx):
        ids = self.get_ids(idx)
        x, y = self.seq.getpart(ids)
        return x, y
    
    def get_ids(self, idx):
        bs = self.seq.batch_size
        ids = self.seq.ids[(idx*bs):(idx*bs+bs) if idx*bs+bs<len(self.seq.ids) else len(self.seq.ids)]
        return ids

In [None]:
img_gen = ImageGenerator(seq)
img_gen

In [None]:
img_gen.get_ids(457)

In [None]:
x, y = img_gen[0]
x.shape, y.shape

In [None]:
y

### make model

In [None]:
from keras import applications
IMG_DIM0 = (4, 210, 280, 3)
IMG_DIM = (420, 560, 3)

In [None]:
def make_trainable_false(model_resnet, trainable=False):
    layers = model_resnet.layers
    for ilayer in layers:
        ilayer.trainable = trainable
    return

class TrainableCtrl(object):
    
    def __init__(self, model_cnvt,
                       model_resnet,
                       model_classifier):
        self.model_cnvt = model_cnvt
        self.model_resnet = model_resnet
        self.model_classifier = model_classifier
        
        self.model_dic = {
            'cnvt': self.model_cnvt,
            'resnet': self.model_resnet,
            'classifier': self.model_classifier
        }
        self.trainable_dic = {}
        
        self.get_trainable()
        
    def get_trainable(self):
        for k in self.model_dic:
            model = self.model_dic[k]
            res = []
            for ilayer in model.layers:
                res.append(ilayer.trainable)
            self.trainable_dic[k] = res
    
    def set_trainable_false(self, model_key):
        model = self.model_dic[model_key]
        make_trainable_false(model)
    
    def set_trainable_true(self, model_key):
        model = self.model_dic[model_key]
        for ii, ilayer in enumerate(model.layers):
            ilayer.trainable = self.trainable_dic[model_key][ii]

In [None]:
def make_model_cnvt(img_dim=IMG_DIM):
    '''==============================
    inputs
    =============================='''
    inp = Input(shape=img_dim)
    oup = Conv2D(3,
                 kernel_size=1,
                 strides=1,
                 padding='same',
                 activation='tanh')(inp)
    oup = Activation('linear')(oup)
    model_cnvt = Model(inp, oup, name='model_cnvt')
    return model_cnvt
# def make_model_cnvt(img_dim=IMG_DIM0):
#     '''==============================
#     inputs
#     =============================='''
#     inp = Input(shape=img_dim)
#     oup = Activation('linear')(inp)
#     model_cnvt = Model(inp, oup, name='model_cnvt')
#     return model_cnvt

model_cnvt = make_model_cnvt()
model_cnvt.summary()

In [None]:
model_resnet = applications.resnet50.ResNet50(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=IMG_DIM,
    pooling=None,
    classes=None)

In [None]:
model_resnet.summary()

In [None]:
def make_model_classifier(input_dim=2048):
    inp_cls = Input((input_dim,))
    oup_cls = Dense(1024, activation='elu')(inp_cls)
    oup_cls = Dense(5)(oup_cls)
    oup_cls = Activation('softmax')(oup_cls)
    model_classifier = Model(inp_cls, oup_cls, name='classifier')
    return model_classifier

model_classifier = make_model_classifier()
model_classifier.summary()

In [None]:
model_classifier.predict(np.ones((2,2048)))

In [None]:
K.clear_session()
model_cnvt = make_model_cnvt()
model_classifier = make_model_classifier()
model_resnet = applications.resnet50.ResNet50(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=IMG_DIM,
    pooling=None,
    classes=None)

def make_model(model_cnvt, model_resnet, model_classifier):
    inp0 = Input(shape=IMG_DIM, name='input0')
    oup0 = model_cnvt(inp0)
    oup0 = model_resnet(oup0)
    oup2 = GlobalAveragePooling2D()(oup0)
    oup0 = model_classifier(oup2)
    oup0 = Activation('linear', name='path_cls_cls')(oup0)
    model0 = Model(inp0, oup0, name='model0')
    
    '''==============================
    inputs
    =============================='''
    model = model0
    
    return {
        'model_classifier': model_classifier,
        'model_resnet': model_resnet,
        'model_cnvt': model_cnvt,
        'model': model,
        'model0': model0
    }

models = make_model(model_cnvt, model_resnet, model_classifier)
models['model'].summary()

In [None]:
train_ctrl = TrainableCtrl(model_cnvt, model_resnet, model_classifier)

In [None]:
'''
model_resnet : not train
'''
train_ctrl.set_trainable_false('resnet')

models['model'].compile(loss='categorical_crossentropy',
                        optimizer=Adam(0.0001),
                        metrics=['categorical_accuracy'])

models['model'].summary()

In [None]:
seq = Seq(train_labels, aug=True, batch_size=16)
print(len(seq))
img_gen = ImageGenerator(seq)

hst = models['model'].fit_generator(img_gen, epochs=2,
                                    steps_per_epoch=len(img_gen),
                                    use_multiprocessing=False, workers=5)
# hst = models['model'].fit_generator(img_gen, epochs=4,
#                                     steps_per_epoch=len(img_gen),
#                                     use_multiprocessing=False, workers=5,
#                                     callbacks=callbacks)

In [None]:
seq_pred = Seq(train_labels, aug=False, batch_size=32)
print(len(seq_pred))
img_gen_pred = ImageGenerator(seq_pred)

In [None]:
pred = models['model'].predict_generator(
    img_gen_pred,
    steps=len(img_gen_pred),
    verbose=1, workers=5)
pred.shape

In [None]:
pred

In [None]:
y_train

In [None]:
print(f1_score(y_train, np.argmax(pred, axis=1), average='macro'))
print(classification_report(y_train, np.argmax(pred, axis=1)))
confusion_matrix(y_train, np.argmax(pred, axis=1))

In [None]:
train_ctrl.set_trainable_true('resnet')

models['model'].compile(loss='categorical_crossentropy',
                        optimizer='adam',
                        metrics=['categorical_accuracy'])

models['model'].summary()

In [None]:
def lr_schedule(epoch):
    lr = 0.0005
    if divmod(epoch,4)[1] == 3:
        lr *= (1/8)
    elif divmod(epoch,4)[1] == 2:
        lr *= (1/4)
    elif divmod(epoch,4)[1] == 1:
        lr *= (1/2)
    elif divmod(epoch,4)[1] == 0:
        pass
    print('Learning rate: ', lr)
    return lr

lr_scheduler = LearningRateScheduler(lr_schedule)
callbacks = [lr_scheduler]

In [None]:
seq = Seq(train_labels, aug=True, batch_size=16)
print(len(seq))
img_gen = ImageGenerator(seq)

# hst = models['model'].fit_generator(img_gen, epochs=4,
#                                     steps_per_epoch=len(img_gen),
#                                     use_multiprocessing=False, workers=5)
hst = models['model'].fit_generator(img_gen, epochs=4,
                                    steps_per_epoch=len(img_gen),
                                    use_multiprocessing=False, workers=5,
                                    callbacks=callbacks)

In [None]:
seq_pred = Seq(train_labels, aug=False, batch_size=32)
print(len(seq_pred))
img_gen_pred = ImageGenerator(seq_pred)

In [None]:
pred = models['model'].predict_generator(
    img_gen_pred,
    steps=len(img_gen_pred),
    verbose=1, workers=5)
pred.shape

In [None]:
pred

In [None]:
y_train

In [None]:
print(f1_score(y_train, np.argmax(pred, axis=1), average='macro'))
print(classification_report(y_train, np.argmax(pred, axis=1)))
confusion_matrix(y_train, np.argmax(pred, axis=1))

In [None]:
seq_test = Seq(test_labels, test=True, aug=False, batch_size=32)
seq_test
print(len(seq_test))
img_gen_pred_test = ImageGenerator(seq_test)

In [None]:
pred_test = models['model'].predict_generator(
    img_gen_pred_test,
    steps=len(img_gen_pred_test),
    verbose=1, workers=3)
pred_test.shape

In [None]:
np.argmax(pred_test, axis=1)

In [None]:
submission = test_labels.copy()
submission['diagnosis'] = np.argmax(pred_test, axis=1)
submission.head()

In [None]:
submission.to_csv('submission.csv', index=False)