In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

FOCAL_LOSS=True

import numpy as np # linear algebra
np.random.seed(42)
import random
random.seed(42)

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
def FocalLoss(gamma=2):
    gamma = float(gamma)

    def compute_loss(target, logit):
        max_val = tf.clip_by_value(-logit, 0, 1.e9)
        loss = logit - logit*target + max_val + tf.log(tf.exp(-max_val) + tf.exp(-logit - max_val))
        invprobs = tf.log_sigmoid(-logit * (target *2.0 - 1.0))
        loss = tf.exp(invprobs * gamma) * loss
        loss =  tf.reduce_sum(loss, axis=1)
        return tf.reduce_mean(loss)
    return compute_loss

#original implementation:
#class FocalLoss(nn.Module):
#    def __init__(self, gamma=2):
#        super().__init__()
#        self.gamma = gamma
#
#    def forward(self, logit, target):
#        target = target.float()
#        max_val = (-logit).clamp(min=0)
#        loss = logit - logit * target + max_val + \
#               ((-max_val).exp() + (-logit - max_val).exp()).log()
#
#        invprobs = F.logsigmoid(-logit * (target * 2.0 - 1.0))
#        loss = (invprobs * self.gamma).exp() * loss
#        if len(loss.size())==2:
#            loss = loss.sum(dim=1)
#        return loss.mean()

In [None]:
import os, sys
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import MobileNet
from keras.models import Sequential, Model, load_model
from keras.layers import Activation, Dense, Multiply, Input, Flatten, Dropout, Conv2D
#from keras import metrics
from keras.optimizers import Adam
from keras import backend as K
from itertools import chain
from collections import Counter
import cv2
import random
import tensorflow as tf
import warnings
warnings.filterwarnings("ignore")

batch_size = 64


path_to_train = '../input/human-protein-atlas-image-classification/train/'
data = pd.read_csv('../input/human-protein-atlas-image-classification/train.csv')

def conv_df2np(df_):
    train_dataset_info = []
    for name, labels in zip(df_['Id'], df_['Target'].str.split(' ')):
        train_dataset_info.append({
            'path':
            os.path.join(path_to_train, name),
            'labels':
            np.array([int(label) for label in labels])
        })
    train_dataset_info = np.array(train_dataset_info)
    return train_dataset_info

train_df_info = conv_df2np(data)


class DataGenerator:
    def __init__(self):
        pass
    
    def create_train(self, dataset_info, batch_size, shape, augument=True):
        assert shape[2] == 3
        while True:
            random_indexes = np.random.choice(len(dataset_info), batch_size)
            batch_images = np.empty((batch_size, shape[0], shape[1], shape[2]))
            batch_labels = np.zeros((batch_size, 28))
            for i, idx in enumerate(random_indexes):
                image = self.load_image(
                    dataset_info[idx]['path'], shape)
                batch_images[i] = image
                batch_labels[i][dataset_info[idx]['labels']] = 1
            yield batch_images, batch_labels

    def load_image(self, path, shape):
        image_red_ch = cv2.imread(path + '_red.png', cv2.IMREAD_GRAYSCALE)
        image_green_ch = cv2.imread(path + '_green.png', cv2.IMREAD_GRAYSCALE)
        image_blue_ch = cv2.imread(path + '_blue.png', cv2.IMREAD_GRAYSCALE)

        image1 = np.stack((image_red_ch, image_green_ch, image_blue_ch), -1)
        image1 = cv2.resize(image1, (224, 224), interpolation = cv2.INTER_AREA)
        return image1.astype(np.float)

train_datagen = DataGenerator()
valid_datagen = DataGenerator()
data['target_list'] = data['Target'].map(
    lambda x: [int(a) for a in x.split(' ')])
all_labels = list(chain.from_iterable(data['target_list'].values))
c_val = Counter(all_labels)
n_keys = c_val.keys()
max_idx = max(n_keys)
data['target_vec'] = data['target_list'].map(
    lambda ck: [i in ck for i in range(max_idx + 1)])
lab_vec = data['target_list'].map(lambda ck: [int(i in ck) for i in range(28)])

train_df = pd.read_csv('../input/mobilenetflsplit/train_part.csv')
valid_df = pd.read_csv('../input/mobilenetflsplit/valid_part.csv')


train_dataset_info = conv_df2np(train_df)
valid_dataset_info = conv_df2np(valid_df)
print(train_dataset_info.shape, valid_dataset_info.shape)

# create train and valid datagens
train_generator = train_datagen.create_train(train_dataset_info, batch_size,
                                             (224, 224, 3))
validation_generator = valid_datagen.create_train(
    valid_dataset_info, batch_size, (224, 224, 3), False)






In [None]:
def create_model(input_shape, n_out):
    inp_mask = Input(shape=input_shape)
    pretrain_model_mask = MobileNet(
        input_shape=(224, 224, 3),  #SWITCH
        include_top=False,
#        weights='imagenet',
        dropout=0.5,
        pooling='avg')
    pretrain_model_mask.name = 'mobilenet'

    x = pretrain_model_mask(inp_mask)
    if FOCAL_LOSS:
        out = Dense(n_out, activation='linear')(x)
    else:
        out = Dense(n_out, activation='sigmoid')(x)
    model = Model(inputs=inp_mask, outputs=[out])

    return model

keras.backend.clear_session()

model = create_model(input_shape=(224, 224, 3), n_out=28)

if FOCAL_LOSS:
    model.compile(
        loss=FocalLoss(gamma=2), optimizer='adam', metrics=['acc'])
else:
    model.compile(
        loss='binary_crossentropy', optimizer='adam', metrics=['acc'])
model.summary()

# train model
history = model.fit_generator(
    train_generator,
    steps_per_epoch=len(train_df) // batch_size,
    validation_data=validation_generator,
    validation_steps=len(valid_df) // batch_size,
    epochs=1,
    verbose=1,
    use_multiprocessing=False,
    shuffle=False)
