In [None]:
import os
print(os.listdir('../input'))

**Setup**

In [None]:
import pandas as pd
import numpy as np
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt
from PIL import Image
from zipfile import ZipFile
import h5py

In [None]:
import tensorflow as tf
from keras import backend as K
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

In [None]:
from keras import applications
from keras.layers import Input, Conv2D, Reshape, Dense, Activation
from keras import regularizers
from keras.models import Model, Sequential
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, ReduceLROnPlateau
from keras.utils import to_categorical

In [None]:
src_dir = '../input/human-protein-atlas-image-classification'
SHAPE = (256, 256)

In [None]:
def overSampling(train_df):
    train_df_orig=train_df.copy()    
    lows = [15,15,15,8,9,10,8,9,10,8,9,10,17,20,24,26,15,27,15,20,24,17,8,15,27,27,27]
    for i in lows:
        target = str(i)
        indicies = train_df_orig.loc[train_df_orig['Target'] == target].index
        train_df = pd.concat([train_df,train_df_orig.loc[indicies]], ignore_index=True)
        indicies = train_df_orig.loc[train_df_orig['Target'].str.startswith(target+" ")].index
        train_df = pd.concat([train_df,train_df_orig.loc[indicies]], ignore_index=True)
        indicies = train_df_orig.loc[train_df_orig['Target'].str.endswith(" "+target)].index
        train_df = pd.concat([train_df,train_df_orig.loc[indicies]], ignore_index=True)
        indicies = train_df_orig.loc[train_df_orig['Target'].str.contains(" "+target+" ")].index
        train_df = pd.concat([train_df,train_df_orig.loc[indicies]], ignore_index=True)
    return train_df

In [None]:
train_df = pd.read_csv(os.path.join(src_dir, "train.csv"))
train_labels = overSampling(train_df)
# train_labels= pd.read_csv(os.path.join(src_dir, "train.csv"))
test_labels = pd.read_csv(os.path.join(src_dir, "sample_submission.csv"))

In [None]:
ID_LIST_TRAIN = train_labels.Id.tolist()

In [None]:
train_labels, val_labels = train_test_split(train_labels, test_size=0.1)

In [None]:
train_labels.shape

In [None]:
val_labels.shape

In [None]:
def get_img(Id, test=False):
    
    flags = cv2.IMREAD_GRAYSCALE
    colors = ['green', 'blue', 'red', 'yellow']    
    if test:
        tgt = 'test'
    else:
        tgt = 'train'         
            
    img = [cv2.imread(src_dir+'/'+tgt+'/'+Id+'_'+color+'.png', flags) for color in colors]
    img = np.stack(img, axis=-1)
    img = cv2.resize(img, SHAPE).astype('float32')
    img = np.divide(img, 255)
    
    return img

In [None]:
def show_img(arr, nrows = 1, ncols = 4, figsize=(15, 5)):
    fig, subs = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
    for ii in range(ncols):
        iplt = subs[ii]
        try:
            img_array = arr[:,:,ii]
            if ii == 0:
                cp = 'Greens'
            elif ii == 1:
                cp = 'Blues'
            elif ii == 2:
                cp = 'Reds'
            else:
                cp = 'Oranges'
            iplt.imshow(img_array, cmap=cp)
        except:
            pass

In [None]:
show_img(get_img(ID_LIST_TRAIN[1]))

In [None]:
y_cat_train_dic = {}
for icat in range(28):
    target = str(icat)
    y_cat_train_5 = np.array([int(target in ee.split()) for ee in train_labels.Target.tolist()])
    y_cat_train_dic[icat] = y_cat_train_5

# Create DataGenerator

In [None]:
import random

class Seq(object):
    sections = None
    index = None
    
    def __init__(self, df, extend=False, aug=False, test=False, batch_size=32):
        self.shaffle = None
        self.extend = extend
        self.aug = aug
        self.test = test
        self.batch_size = batch_size
        self.df = df
        
        # proccess
        self.ids = self.df.Id.tolist()
        self.reversed = sorted(range(SHAPE[0]), reverse=True)
        
        # estimate self length
        self.initialize_it()
        self.len = 1
        for _ in self.it:
            self.len += 1
        
        self.initialize_it()
    
    def initialize_it(self):
        if self.shaffle:
            '''not implemented yet'''
            raise NotImplementedError
            #random.seed(self.state)
            #random.shuffle(self.ids)
        
        self.it = iter(range(0, len(self.ids), self.batch_size))
        self.idx_next = self.it.__next__()
    
    def __len__(self):
        return self.len
    
    def __iter__(self):
        return self
    
    def __next__(self):
        idx = self.idx_next
        self.ids_part = self.ids[idx:((idx+self.batch_size) if idx+self.batch_size<len(self.ids) else len(self.ids))]
        res = self.getpart(self.ids_part)
        try:
            self.idx_next = self.it.__next__()
        except StopIteration:
            self.initialize_it()
        return res
    
    def __getitem__(self, id0):
        arr, tgts = self.get_data(id0)
        cat = self.convert_tgts(tgts)
        return arr, cat
    
    k_list = list(range(4))
    def random_transform(self, arr):
        k = random.choice(self.k_list)
        arr0 = np.rot90(arr, k=k)
        if random.randint(0,1):
            arr0 = arr0[self.reversed,:,:]
        if random.randint(0,1):
            arr0 = arr0[:,self.reversed,:]
        return arr0
    
    def convert_tgts(self, tgts):
        try:
            cats = to_categorical(tgts, num_classes=28)
            cat = cats.sum(axis=0)
        except TypeError:
            cat = np.zeros((28,))
        return cat
    
    def get_data(self, id0):
        arr = get_img(id0, test=self.test)
        
        try:
            y0 = (self.df.Target[self.df.Id == id0]).tolist()[0]
            y1 = y0.split()
            y = [int(ee) for ee in y1]
        except AttributeError:
            y = None
        return arr, y
    
    def getpart(self, ids):
        xs = []
        ys = []
        for id0 in ids:
            self.extend_data(id0, xs, ys)
        
        x = np.stack(xs)
        y = np.stack(ys)
        x_dummy = np.zeros((len(x), 1))
        x_ret = {
            'input': x,
            'input_cls': y,
        }
        y_ret = {
            'path_fit_cls_img': x_dummy,
            'path_cls_img_cls': y,
            'path_fit_imgA': x_dummy,
            'path_fit_img_cls_img': x_dummy,
            'path_cls_cls': y,
            'path_img_cls': y,
            'path_img_img_cls': y,
            'path_fit_cls_img_imgE': x_dummy,
        }
        return (x_ret, y_ret)
    
    def split(self, arr, sections=sections):
        res0 = np.vsplit(arr, sections)
        res = [np.hsplit(ee, sections) for ee in res0]
        res = list(chain.from_iterable(res))
        return res
    
    def extend_data(self, id0, xs, ys):
        arr0, cat = self[id0]
        
        # data augmentation
        if self.extend:
            mm = up_sample2[cat==1].max()
            mm = int(mm)
            #print(mm)
            for ii in range(mm):
                if self.aug:
                    img = self.random_transform(arr0)
                else:
                    img = arr0
                xs.append(img.flatten())
                ys.append(cat)
        else:
            if self.aug:
                img = self.random_transform(arr0)
            else:
                img = arr0
            xs.append(img.flatten())
            ys.append(cat)

# Make Model

In [None]:
model_resnet = applications.inception_resnet_v2.InceptionResNetV2(
    include_top=False,
    weights='imagenet',
    input_tensor=None,
    input_shape=(256,256,3),
    pooling='avg',
    classes=None)

In [None]:
model_resnet.load_weights('../input/keras-inceptionresnetv2-resize139x139-005focal/model_5_resnet.h5')

In [None]:
def make_trainable_false(model_resnet, trainable=False):
    layers = model_resnet.layers
    for ilayer in layers:
        ilayer.trainable = trainable
    return

In [None]:
img_shape = (256, 256, 4)
img_dim = np.array(img_shape).prod()
print(img_dim)

In [None]:
def make_model_cnvt(img_dim, img_shape):
    '''==============================
    inputs
    =============================='''
    inp = Input(shape=(img_dim,))
    oup = Reshape(img_shape)(inp)
    oup = Conv2D(3,
                 kernel_size=1,
                 strides=1,
                 padding='same',
                 activation='tanh',
                 kernel_regularizer=regularizers.l2(1e-4))(oup)
    #kernel_regularizer=regularizers.l2(1e-4)
    model_cnvt = Model(inp, oup, name='model_cnvt')
    return model_cnvt

In [None]:
model_cnvt = make_model_cnvt(img_dim, img_shape)
model_cnvt.load_weights('../input/keras-inceptionresnetv2-resize139x139-005focal/model_5_cnvt.h5')

In [None]:
def make_model_classifier(input_dim=1536):
    inp_cls = Input((input_dim,))
    oup_cls = Dense(28)(inp_cls)
    oup_cls = Activation('sigmoid')(oup_cls)
    model_classifier = Model(inp_cls, oup_cls, name='classifier')
    return model_classifier

In [None]:
model_classifier = make_model_classifier()
model_classifier.load_weights('../input/keras-inceptionresnetv2-resize139x139-005focal/model_5_classifier.h5')

In [None]:
def make_model(img_dim, model_cnvt, model_resnet, model_classifier):
    '''==============================
    inputs
    =============================='''
    inp = Input(shape=(img_dim,), name='input')
    oup = model_cnvt(inp)
    oup = model_resnet(oup)
    oup = model_classifier(oup)
    oup = Activation('linear', name='path_cls_cls')(oup)
    
    model = Model(inp, oup, name='model')
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['categorical_accuracy', 'binary_accuracy'])
    
    return {
        'model_classifier': model_classifier,
        'model_resnet': model_resnet,
        'model_cnvt': model_cnvt,
        'model': model
    }

models = make_model(img_dim, model_cnvt, model_resnet, model_classifier)
models['model'].summary()

In [None]:
'''
Thanks Iafoss.
pretrained ResNet34 with RGBY
https://www.kaggle.com/iafoss/pretrained-resnet34-with-rgby-0-460-public-lb
'''

def focal_loss(y_true, y_pred):    
    gamma = 2
    input = tf.cast(y_pred, tf.float32)
    max_val = K.clip(-input, 0, 1)    
    bce_loss = input - input * y_true + max_val + K.log(K.exp(-max_val) + K.exp(-input - max_val))    
    invprobs = tf.log_sigmoid(-input * (y_true * 2.0 - 1.0))
    focal_loss = K.exp(invprobs * gamma) * bce_loss
    return K.mean(K.sum(focal_loss, axis=1))


THRESHOLD = 0.5

# credits: https://www.kaggle.com/guglielmocamporese/macro-f1-score-keras

K_epsilon = K.epsilon()
def f1(y_true, y_pred):
    #y_pred = K.round(y_pred)
    y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), THRESHOLD), K.floatx())
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K_epsilon)
    r = tp / (tp + fn + K_epsilon)

    f1 = 2*p*r / (p+r+K_epsilon)
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

def f1_loss(y_true, y_pred):
    #y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), THRESHOLD), K.floatx())
    tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0)
    tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0)
    fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0)
    fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0)

    p = tp / (tp + fp + K_epsilon)
    r = tp / (tp + fn + K_epsilon)

    f1 = 2*p*r / (p+r+K_epsilon)
    f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1)
    return 1-K.mean(f1)

# Train start

In [None]:
models['model'].compile(loss=f1_loss,
                        optimizer='adam',
                        metrics=['categorical_accuracy', 'binary_accuracy', f1, focal_loss])

In [None]:
def lr_schedule(epoch):
    lr = 1e-3
    if epoch <= 8:
        pass
    else:
        lr = 1e-4
    print('Learning rate: ', lr)
    return lr

lr_scheduler = LearningRateScheduler(lr_schedule)

callbacks = [lr_scheduler,
            ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1, mode='min')]

In [None]:
seq = Seq(train_labels, extend=False, aug=True, batch_size=32)
print(len(seq))

In [None]:
val_seq = Seq(val_labels, extend=False, aug=True, batch_size=32)
print(len(val_seq))

In [None]:
hst = models['model'].fit_generator(seq, epochs=25,
                              steps_per_epoch=len(seq),validation_data=val_seq,validation_steps=10,
                              callbacks=callbacks)

# Show FIG

In [None]:
def gen_graph(history, title):
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title('Loss ' + title)
    plt.ylabel('Score')
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()
    
    plt.plot(history.history['categorical_accuracy'])
    plt.plot(history.history['val_categorical_accuracy'])
    plt.title('Categorical Accuracy ' + title)
    plt.ylabel('Score')
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()
    
    plt.plot(history.history['binary_accuracy'])
    plt.plot(history.history['val_binary_accuracy'])
    plt.title('Binary Accuracy ' + title)
    plt.ylabel('Score')
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()
    
    plt.plot(history.history['f1'])
    plt.plot(history.history['val_f1'])
    plt.title('F1 ' + title)
    plt.ylabel('Score')
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()
    
    plt.plot(history.history['focal_loss'])
    plt.plot(history.history['val_focal_loss'])
    plt.title('Focal Loss ' + title)
    plt.ylabel('Score')
    plt.xlabel('Epoch')
    plt.legend(['train', 'validation'], loc='upper left')
    plt.show()

In [None]:
gen_graph(hst, "ResNet")

In [None]:
def calc_threshold(pred):
    ### calc threshold
    threshold_dic = {}
    for idx in tqdm(range(28)):
        m = 0
        for ii in range(100):
            threshold0 = ii*0.01
            f1_val = f1_score(y_cat_train_dic[idx], threshold0<(pred[:,idx]))
            if m < f1_val:
                threshold_dic[idx] = threshold0+0.005
                m = f1_val
    return threshold_dic

In [None]:
def make_test(pred):
    test_labels1 = test_labels.copy()
    test_labels1['Predicted'] = [str(ee) for ee in np.argmax(pred, axis=1)]
    print(test_labels1.head())
    #test_labels1.to_csv(fn0, index=False)
    
    test_labels2 = test_labels1.copy()
    for ii in range(test_labels2.shape[0]):
        threshold = list(zip(*sorted(list(threshold_dic.items()), key=lambda x:x[0], reverse=False)))[1]
        idx = threshold < pred[ii,:]
        tgt = test_labels2['Predicted'][ii]
        tgt = [tgt] + [str(ee) for ee in np.arange(28)[idx]]
        tgt = set(tgt)
        tgt = ' '.join(tgt)
        test_labels2['Predicted'][ii] = tgt
    print(test_labels2.head())
    #test_labels2.to_csv(fn, index=False)
    return test_labels1, test_labels2

In [None]:
seq_pred = Seq(train_labels, test=False, aug=False, batch_size=128)
len(seq_pred)
pred = models['model'].predict_generator(seq_pred, steps=len(seq_pred), verbose=1)

In [None]:
threshold_dic = calc_threshold(pred)
threshold_dic

In [None]:
seq_test = Seq(test_labels, test=True, aug=False, batch_size=128)
seq_test
pred_test = models['model'].predict_generator(seq_test, steps=len(seq_test), verbose=1)


In [None]:
test_labels, test_labels = make_test(pred_test)
test_labels.head()

In [None]:
test_labels.to_csv('InceptionResNetV25.csv', index=False)

# Save Model

In [None]:
'''save weights for later loading'''
models['model_cnvt'].save_weights('model_cnvt_25.h5')
models['model_resnet'].save_weights('model_resnet_25.h5')
models['model_classifier'].save_weights('model_classifier_25.h5')

In [None]:
from IPython.display import FileLink, FileLinks
FileLinks('.')