In [None]:
!pip install -q efficientnet

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import cv2
import re
import math
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, Callback

import tensorflow_addons as tfa
from tensorflow_addons.optimizers import SWA, RectifiedAdam , Lookahead  
from tensorflow_addons.losses import SigmoidFocalCrossEntropy

from kaggle_datasets import KaggleDatasets
from sklearn.metrics import roc_curve, auc, roc_auc_score
from sklearn.model_selection import train_test_split, KFold

# Import EfficientNet
import efficientnet.tfkeras as efn
import matplotlib.pyplot as plt
%matplotlib inline

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Detect hardware, return appropriate distribution strategy
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is
    # set: this is always the case on Kaggle.
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.keras.backend.clear_session()
    tf.tpu.experimental.shutdown_tpu_system()
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

# Determine Main Settings 

In [None]:
MODEL = efn.EfficientNetB3
DIM = 256
WHAT_PREDICT = 0

USE_ISIC2019 = True
ODD_OR_EVEN = 'odd'

PL_TEST_ADD = False
PL_TEST_ADD_MULTI = 2

UPSAMPLE_FLG = False
UPSAMPLING_MULTIPLAER = 2

LOAD_WEIGHTS = False

EPOCHS = 20
N_FOLDS = 3
WGTS = [1/N_FOLDS]*N_FOLDS
BATCH_SIZE = 32 * strategy.num_replicas_in_sync
SEED_ALL = 298149
SUBMISSION_NAME = 'submission.csv'

LR_SCALE = 1
TTA = 20

# Coarse DropOut
DROPRATE = 0
DROPCT = 8
DROPSIZE = 0.15

# Chess Dropout
CHESS_DROPOUT = 1
CHESS_BLOCK_SHAPE = (8,15)
CHESS_PROB_EVEN = 0.6
CHESS_PROB_ODD = 0.6
CHESS_PROB_ROTATE = 0.5
CHESS_PROB_TO_CHANGE_PROBS = 0.5

#Sprinkles
SPRINKLES_CFG = {'num_holes': 180, 'side_length': 19,'sprinkles_mode':'normal','sprinkles_prob': 0}

# Hair Augmentations
# HAIR_NUM_MIN = 0
# HAIR_NUM_MAX = 20
# HAIR_AUG_PROB = 1.0

AUTO = tf.data.experimental.AUTOTUNE

# Download TFRecords Train/Isic2019/Test 

In [None]:
GCS_DS_PATH = KaggleDatasets().get_gcs_path(f'cat-melanoma-{DIM}x{DIM}')
GCS_DS_PATH_TEST = KaggleDatasets().get_gcs_path(f'melanoma-hair-{DIM}x{DIM}')

files_train = np.sort(np.array(tf.io.gfile.glob(GCS_DS_PATH + '/train_cat*.tfrec')))
files_test  = np.sort(np.array(tf.io.gfile.glob(GCS_DS_PATH_TEST + '/test_pl_*.tfrec')))

if PL_TEST_ADD:
    GCS_DS_PATH_PL = KaggleDatasets().get_gcs_path(f'melanoma-hair-{DIM}x{DIM}')
    files_train_pl = np.sort(np.array(tf.io.gfile.glob(GCS_DS_PATH_PL + '/test_pl_*.tfrec')))
    
if USE_ISIC2019:
    files_train_isic_2019 = np.sort(np.array(tf.io.gfile.glob(GCS_DS_PATH + '/isic2019_cat*.tfrec')))
    files_train_isic_2018 = np.sort(np.array(tf.io.gfile.glob(GCS_DS_PATH + '/isic2018_cat*.tfrec')))
    if ODD_OR_EVEN == 'both':
        files_train_isic_2019 = np.concatenate([files_train_isic_2019,files_train_isic_2018])
#   ADD Only ISIC2019
    elif ODD_OR_EVEN == 'odd':
        files_train_isic_2019 = files_train_isic_2019
#   ADD Only ISIC2018 and ISIC2017
    else:
        files_train_isic_2019 = files_train_isic_2018
        
if UPSAMPLE_FLG:
    GCS_DS_PATH_UPSAMPLING = KaggleDatasets().get_gcs_path(f'malignant-v2-{DIM}x{DIM}')
    files_upsampling_train = np.sort(np.array(tf.io.gfile.glob(GCS_DS_PATH_UPSAMPLING + '/train*.tfrec')))[:15]
    files_upsampling_online = np.sort(np.array(tf.io.gfile.glob(GCS_DS_PATH_UPSAMPLING + '/train*.tfrec')))[15:30]
    if USE_ISIC2019:
        if ODD_OR_EVEN == 'both':
            files_upsampling_isic2019 = np.sort(np.array(tf.io.gfile.glob(GCS_DS_PATH_UPSAMPLING + '/train*.tfrec')))[30:]
        elif ODD_OR_EVEN == 'add':
            files_upsampling_isic2019 = np.sort(np.array(tf.io.gfile.glob(GCS_DS_PATH_UPSAMPLING + '/train*.tfrec')))[30:][1::2]
        else:
            files_upsampling_isic2019 = np.sort(np.array(tf.io.gfile.glob(GCS_DS_PATH_UPSAMPLING + '/train*.tfrec')))[30:][::2]
    
    

In [None]:
train_df_from_image = pd.read_csv(f'../input/melanoma-{DIM}x{DIM}/train.csv')
if USE_ISIC2019:
    train_df_isic_2019 = pd.read_csv(f'../input/isic2019-{DIM}x{DIM}/train.csv')

# Define Input Pipeline for Images 

In [None]:

@tf.function
def chess_dropout(img, shape_img, shape_block, 
                  prob_dropout ,prob_rotate=0.5, 
                  prob_even_block = 0.5, prob_odd_block = 0.5,
                  prob_to_change_probs = 0.5
                 ):
    if tf.cast( tf.random.uniform([],0,1)>=prob_dropout, tf.int32)==1:
        return img
    
    rotate_flag = tf.cast( tf.random.uniform([],0,1)<prob_rotate, tf.int32)
    if rotate_flag==1:
        zero_block = tf.zeros(shape=(shape_block[1],shape_block[0],3))
    else:
        zero_block = tf.zeros(shape=(shape_block[0],shape_block[1],3))
    shape_zeros = zero_block.shape
#     shape_img = img.shape
    
    P_CHANGE_FLG = tf.cast( tf.random.uniform([],0,1)<=prob_to_change_probs, tf.int32)
    if P_CHANGE_FLG==1:
        PROB_EVEN = prob_odd_block
        PROB_ODD = prob_even_block
    else:
        PROB_EVEN = prob_even_block
        PROB_ODD = prob_odd_block
    
    def random_block(i,j):
        if tf.math.floormod((i+j),2) == 0:
            P0  =  tf.math.less(tf.random.uniform([],0,1),PROB_EVEN)
            return P0
        else:
            P1  =  tf.math.less(tf.random.uniform([],0,1),PROB_ODD)
            return P1
    
#     Количество блоков с строке
    num_in_row_block = tf.math.floordiv(shape_img[1], shape_zeros[1])
    residual_row = tf.math.floormod(shape_img[1], shape_zeros[1])
# Количество блоков в столбце
    num_in_col_block = tf.math.floordiv(shape_img[0], shape_zeros[0])
    residual_col = tf.math.floormod(shape_img[0], shape_zeros[0])

    
    img_row = tf.compat.v1.placeholder(tf.float32, name=  'img_row')
    img_row = tf.cast(zero_block, tf.float32) 
    img_col = tf.compat.v1.placeholder(tf.float32, name = 'img_col')
    img_col = tf.cast(zero_block, tf.float32) 
    img_rest_row = tf.compat.v1.placeholder(tf.float32, name = 'img_rest_row')
    img_rest_row = tf.cast(zero_block, tf.float32) 
    
    for j in range(num_in_col_block):
        tf.autograph.experimental.set_loop_options(
        shape_invariants=[(img_row, tf.TensorShape([None,None,3])),
                         (img_col, tf.TensorShape([None,None,3])),
                          (img_rest_row, tf.TensorShape([None,None,3])),
                         ])
        for i in range(num_in_row_block):
            if i==0:
                img_row = (tf.cast(zero_block, tf.float32) 
                                       if random_block(i,j) 
                                       else img[shape_zeros[0]*j:shape_zeros[0]*(j+1) ,
                                                          shape_zeros[1]*i:shape_zeros[1]*(i+1) ,
                                                          :] 
                          )
            else:
                img_row = tf.concat([img_row, (tf.cast(zero_block, tf.float32) 
                                               if random_block(i,j) 
                                                  else img[shape_zeros[0]*j:shape_zeros[0]*(j+1) ,
                                                          shape_zeros[1]*i:shape_zeros[1]*(i+1) ,
                                                          :] 
                                              )], axis=1)
        
        img_row = tf.concat( [img_row, (tf.cast(zero_block[:,:residual_row,:], tf.float32 )
                                        if random_block(num_in_row_block,j) 
                                                 else img[shape_zeros[0]*j:shape_zeros[0]*(j+1),
                                                          num_in_row_block*shape_zeros[1]:,:]  
                                       )], 
                                                                 axis=1)
        if j==0:
            img_col = img_row
        else:
            img_col = tf.concat( [img_col, img_row], axis=0)
            
    for i in range(num_in_row_block):
        tf.autograph.experimental.set_loop_options(
        shape_invariants=[
                          (img_rest_row, tf.TensorShape([None,None,3])),
                         ])
        if i==0:
            img_rest_row = (tf.cast(zero_block[:residual_col,:,:] , tf.float32)
                            if random_block(num_in_col_block,i) 
                                                 else img[shape_zeros[0]*num_in_col_block:,
                                                          shape_zeros[1]*i:shape_zeros[1]*(i+1) ,
                                                          :] 
                           )
        else:
            img_rest_row = tf.concat([img_rest_row, (tf.cast(zero_block[:residual_col,:,:], tf.float32 )
                                                     if random_block(num_in_col_block,i) 
                                                 else img[shape_zeros[0]*num_in_col_block:,
                                                          shape_zeros[1]*i:shape_zeros[1]*(i+1) ,
                                                          :] 
                                                    )], axis=1)
    img_rest_row = tf.concat([ img_rest_row, (tf.cast(zero_block[:residual_col,:residual_row,:], tf.float32)
                                              if random_block(num_in_row_block,num_in_col_block) else
                        img[shape_zeros[0]*num_in_col_block:,
                            num_in_row_block*shape_zeros[1]:,:]
                                             )], axis=1)
    
    img = tf.concat([img_col,img_rest_row], axis=0)
    img = tf.reshape(img, [shape_img[0],shape_img[1],3])
    return img
    
    
    
def make_mask(num_holes,side_length,rows, cols, num_channels):
        """Builds the mask for all sprinkles."""
        row_range = tf.tile(tf.range(rows)[..., tf.newaxis], [1, num_holes])
        col_range = tf.tile(tf.range(cols)[..., tf.newaxis], [1, num_holes])
        r_idx = tf.random.uniform([num_holes], minval=0, maxval=rows-1,
                                  dtype=tf.int32)
        c_idx = tf.random.uniform([num_holes], minval=0, maxval=cols-1,
                                  dtype=tf.int32)
        r1 = tf.clip_by_value(r_idx - side_length // 2, 0, rows)
        r2 = tf.clip_by_value(r_idx + side_length // 2, 0, rows)
        c1 = tf.clip_by_value(c_idx - side_length // 2, 0, cols)
        c2 = tf.clip_by_value(c_idx + side_length // 2, 0, cols)
        row_mask = (row_range > r1) & (row_range < r2)
        col_mask = (col_range > c1) & (col_range < c2)

        # Combine masks into one layer and duplicate over channels.
        mask = row_mask[:, tf.newaxis] & col_mask
        mask = tf.reduce_any(mask, axis=-1)
        mask = mask[..., tf.newaxis]
        mask = tf.tile(mask, [1, 1, num_channels])
        return mask
    
def sprinkles(image, cfg = SPRINKLES_CFG): 
    num_holes = cfg['num_holes']
    side_length = cfg['side_length']
    mode = cfg['sprinkles_mode']
    PROBABILITY = cfg['sprinkles_prob']
    
    RandProb = tf.cast( tf.random.uniform([],0,1) < PROBABILITY, tf.int32)
    if (RandProb == 0)|(num_holes == 0): 
        return image
    
    img_shape = tf.shape(image)
    if mode is 'normal':
        rejected = tf.zeros_like(image)
    elif mode is 'salt_pepper':
        num_holes = num_holes // 2
        rejected_high = tf.ones_like(image)
        rejected_low = tf.zeros_like(image)
    elif mode is 'gaussian':
        rejected = tf.random.normal(img_shape, dtype=tf.float32)
    else:
        raise ValueError(f'Unknown mode "{mode}" given.')
        
    rows = img_shape[0]
    cols = img_shape[1]
    num_channels = img_shape[-1]
    if mode is 'salt_pepper':
        mask1 = make_mask(num_holes,side_length,rows, cols, num_channels)
        mask2 = make_mask(num_holes,side_length,rows, cols, num_channels)
        filtered_image = tf.where(mask1, rejected_high, image)
        filtered_image = tf.where(mask2, rejected_low, filtered_image)
    else:
        mask = make_mask(num_holes,side_length,rows, cols, num_channels)
        filtered_image = tf.where(mask, rejected, image)
    return filtered_image    
    

In [None]:

def dropout(image, DIM=256, PROBABILITY = 0.75, CT = 8, SZ = 0.2):
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image with CT squares of side size SZ*DIM removed
    
    # DO DROPOUT WITH PROBABILITY DEFINED ABOVE
    P = tf.cast( tf.random.uniform([],0,1)<PROBABILITY, tf.int32)
    if (P==0)|(CT==0)|(SZ==0): return image
    
    for k in range(CT):
        # CHOOSE RANDOM LOCATION
        x = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
        y = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
        # COMPUTE SQUARE 
        WIDTH = tf.cast( SZ*DIM,tf.int32) * P
        ya = tf.math.maximum(0,y-WIDTH//2)
        yb = tf.math.minimum(DIM,y+WIDTH//2)
        xa = tf.math.maximum(0,x-WIDTH//2)
        xb = tf.math.minimum(DIM,x+WIDTH//2)
        # DROPOUT IMAGE
        one = image[ya:yb,0:xa,:]
        two = tf.zeros([yb-ya,xb-xa,3]) 
        three = image[ya:yb,xb:DIM,:]
        middle = tf.concat([one,two,three],axis=1)
        image = tf.concat([image[0:ya,:,:],middle,image[yb:DIM,:,:]],axis=0)
            
    # RESHAPE HACK SO TPU COMPILER KNOWS SHAPE OF OUTPUT TENSOR 
    image = tf.reshape(image,[DIM,DIM,3])
    return image

In [None]:
ROT_ = 180.0
SHR_ = 2.0
HZOOM_ = 8.0
WZOOM_ = 8.0
HSHIFT_ = 8.0
WSHIFT_ = 8.0

def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear    = math.pi * shear    / 180.

    def get_3x3_mat(lst):
        return tf.reshape(tf.concat([lst],axis=0), [3,3])
    
    # ROTATION MATRIX
    c1   = tf.math.cos(rotation)
    s1   = tf.math.sin(rotation)
    one  = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    
    rotation_matrix = get_3x3_mat([c1,   s1,   zero, 
                                   -s1,  c1,   zero, 
                                   zero, zero, one])    
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)    
    
    shear_matrix = get_3x3_mat([one,  s2,   zero, 
                                zero, c2,   zero, 
                                zero, zero, one])        
    # ZOOM MATRIX
    zoom_matrix = get_3x3_mat([one/height_zoom, zero,           zero, 
                               zero,            one/width_zoom, zero, 
                               zero,            zero,           one])    
    # SHIFT MATRIX
    shift_matrix = get_3x3_mat([one,  zero, height_shift, 
                                zero, one,  width_shift, 
                                zero, zero, one])
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), 
                 K.dot(zoom_matrix,     shift_matrix))


def transform(image, DIM=DIM):    
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    XDIM = DIM%2 #fix for size 331
    
    rot = ROT_ * tf.random.normal([1], dtype='float32')
    shr = SHR_ * tf.random.normal([1], dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1], dtype='float32') / HZOOM_
    w_zoom = 1.0 + tf.random.normal([1], dtype='float32') / WZOOM_
    h_shift = HSHIFT_ * tf.random.normal([1], dtype='float32') 
    w_shift = WSHIFT_ * tf.random.normal([1], dtype='float32') 

    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x   = tf.repeat(tf.range(DIM//2, -DIM//2,-1), DIM)
    y   = tf.tile(tf.range(-DIM//2, DIM//2), [DIM])
    z   = tf.ones([DIM*DIM], dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m, tf.cast(idx, dtype='float32'))
    idx2 = K.cast(idx2, dtype='int32')
    idx2 = K.clip(idx2, -DIM//2+XDIM+1, DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack([DIM//2-idx2[0,], DIM//2-1+idx2[1,]])
    d    = tf.gather_nd(image, tf.transpose(idx3))
        
    return tf.reshape(d,[DIM, DIM,3])

In [None]:
def read_labeled_tfrecord(example):
    tfrec_format = {
        'image'                        : tf.io.FixedLenFeature([], tf.string),
        'image_name'                   : tf.io.FixedLenFeature([], tf.string),
#         'patient_id'                   : tf.io.FixedLenFeature([], tf.int64),
#         'sex'                          : tf.io.FixedLenFeature([], tf.int64),
#         'age_approx'                   : tf.io.FixedLenFeature([], tf.int64),
#         'anatom_site_general_challenge': tf.io.FixedLenFeature([], tf.int64),
#         'diagnosis'                    : tf.io.FixedLenFeature([], tf.int64),
        'target'                       : tf.io.FixedLenFeature([], tf.int64)
    }           
    example = tf.io.parse_single_example(example, tfrec_format)
    return example['image'], tf.cast((example['target']==WHAT_PREDICT), tf.int64)


def read_unlabeled_tfrecord(example, return_image_name):
    tfrec_format = {
        'image'                        : tf.io.FixedLenFeature([], tf.string),
        'image_name'                   : tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    return example['image'], example['image_name'] if return_image_name else 0

 
def prepare_image(img, augment=True, dim=256, droprate=0, dropct=0, dropsize=0):    
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32) / 255.0
#     img = tf.keras.applications.efficientnet.preprocess_input(img, data_format=None)
    
    if augment:
        img = transform(img,DIM=dim)
#         if (droprate!=0)&(dropct!=0)&(dropsize!=0): 
#             img = dropout(img, DIM=dim, PROBABILITY=droprate, CT=dropct, SZ=dropsize) 
#         if HAIR_AUG_PROB!=0:
#             img = hair_aug_tf(img, PROBABILITY = HAIR_AUG_PROB)
#         if CHESS_DROPOUT!=0:
#             img = chess_dropout(img, shape_img = (DIM,DIM),shape_block = CHESS_BLOCK_SHAPE,
#                                prob_dropout =CHESS_DROPOUT, prob_rotate = CHESS_PROB_ROTATE,
#                                 prob_even_block = CHESS_PROB_EVEN, prob_odd_block = CHESS_PROB_ODD,
#                                 prob_to_change_probs = CHESS_PROB_TO_CHANGE_PROBS
#                                )    
        if SPRINKLES_CFG['sprinkles_prob'] >0:
            img = sprinkles(img, SPRINKLES_CFG)
        img = tf.image.random_flip_left_right(img)
        #img = tf.image.random_hue(img, 0.01)
        img = tf.image.random_saturation(img, 0.7, 1.3)
        img = tf.image.random_contrast(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.1)
                      
    img = tf.reshape(img, [dim,dim, 3])
            
    return img

def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) 
         for filename in filenames]
    return np.sum(n)

In [None]:
def get_dataset(files, augment = False, shuffle = False, repeat = False, 
                labeled=True, return_image_names=True, batch_size=BATCH_SIZE, dim=256):
    
    ds = tf.data.TFRecordDataset(files, num_parallel_reads=AUTO)
    ds = ds.cache()
    
    if repeat:
        if type(repeat)==int:
            ds = ds.repeat(repeat)
        else:
            ds = ds.repeat()
#         options = tf.data.Options()
#         options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.FILE
#         ds = ds.with_options(options)
    
    if shuffle: 
        ds = ds.shuffle(1024*8)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        ds = ds.with_options(opt)
        
    if labeled: 
        ds = ds.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
    else:
        ds = ds.map(lambda example: read_unlabeled_tfrecord(example, return_image_names), 
                    num_parallel_calls=AUTO)      
    
    ds = ds.map(lambda img, imgname_or_label: (prepare_image(img, augment=augment, dim=dim, 
                                                droprate=DROPRATE, dropct=DROPCT, dropsize=DROPSIZE), 
                                               imgname_or_label), 
                num_parallel_calls=AUTO)
    
    ds = ds.batch(batch_size)
    ds = ds.prefetch(AUTO)
#     ds = strategy.experimental_distribute_dataset(ds)

    return ds

# Define Model and LR Schedule 

In [None]:
def build_model(dim=DIM):
#     with strategy.scope():
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.05) 
#     loss = SigmoidFocalCrossEntropy(alpha=0.75, gamma=2.0)
    
    model = tf.keras.Sequential([
            MODEL(
                input_shape=(dim, dim, 3),
#                 weights='imagenet',
                drop_connect_rate=0.4,
                weights='noisy-student',
                include_top=False
            ),
            tf.keras.layers.GlobalAveragePooling2D(),
    #         Dense(256, activation='relu'),
            Dropout(0.2),
            Dense(1, activation='sigmoid')
    ])

    model.compile(
            optimizer='adam',
    #         optimizer=optimizer,
#             loss = 'binary_crossentropy',
           loss = loss,
            metrics=tf.keras.metrics.AUC(name='training_AUC')
    )
    return model
#         model.summary()

In [None]:
def get_lr_callback(batch_size=BATCH_SIZE):
    lr_start   = 0.000005*LR_SCALE
    lr_max     = 0.00000125 * batch_size*LR_SCALE
    lr_min     = 0.0000005*LR_SCALE
    lr_ramp_ep = 4
    lr_sus_ep  = 2
    lr_decay   = 0.7
   
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
            
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
            
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
            
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    return lr_callback

# Train Stratified KFold 

In [None]:
cv = KFold(n_splits = N_FOLDS, random_state=SEED_ALL, shuffle=True)
if USE_ISIC2019:
    cv_isic_2019 = KFold(n_splits = N_FOLDS, random_state=SEED_ALL, shuffle=True)
    
oof_pred = []; oof_tar = []; oof_val = []; oof_names = []; oof_folds = [] 
preds = np.zeros((count_data_items(files_test),1))

for num_fold,split in enumerate(cv.split(files_train)):
    train_idx = split[0]
    valid_idx = split[1]
    files_temp_train = files_train[train_idx]
    files_temp_valid = files_train[valid_idx]
    
#     Добавляем PL
    if PL_TEST_ADD:
        files_temp_train = np.concatenate([files_temp_train,
                                          np.tile(files_train_pl[train_idx], PL_TEST_ADD_MULTI)
                                          ])
        files_temp_valid = np.concatenate([files_temp_valid,
                                          np.tile(files_train_pl[valid_idx], PL_TEST_ADD_MULTI)
                                          ])
    
    
    #     Добавляем Upsample
    if UPSAMPLE_FLG:
        files_temp_train = np.concatenate([files_temp_train, 
                                           np.tile(files_upsampling_train[train_idx],UPSAMPLING_MULTIPLAER),
                                           np.tile(files_upsampling_online[train_idx],UPSAMPLING_MULTIPLAER)
                                          ])
        files_temp_valid = np.concatenate([files_temp_valid, 
                                           np.tile(files_upsampling_train[valid_idx],UPSAMPLING_MULTIPLAER),
                                           np.tile(files_upsampling_online[valid_idx],UPSAMPLING_MULTIPLAER)
                                          ])
    # Добавляем Внешние данные с Upsample или Нет
    if USE_ISIC2019:
        split_isic = next(cv_isic_2019.split(files_train_isic_2019))
        files_isic_temp_train = files_train_isic_2019[split_isic[0]]
        files_isic_temp_valid = files_train_isic_2019[split_isic[1]]
        if UPSAMPLE_FLG:
            files_isic_temp_train = np.concatenate([files_isic_temp_train, np.tile(files_upsampling_isic2019[split_isic[0]],UPSAMPLING_MULTIPLAER)])
            files_isic_temp_valid = np.concatenate([files_isic_temp_valid, np.tile(files_upsampling_isic2019[split_isic[1]],UPSAMPLING_MULTIPLAER)])
        files_temp_train = np.concatenate([files_temp_train,files_isic_temp_train])
        files_temp_valid = np.concatenate([files_temp_valid,files_isic_temp_valid])
    # Обучаем модели
    
#     Remain Only ISIC2019
#     files_temp_train = np.array([i for i in files_temp_train if 'isic' in i])
    
    train_dataset = get_dataset(files_temp_train, repeat=True, shuffle=True, augment=True,
                                dim=DIM, batch_size=BATCH_SIZE, labeled=True, return_image_names=False)
    valid_dataset = get_dataset(files_temp_valid, repeat=2, shuffle=False, augment=True, 
                                dim=DIM, batch_size=BATCH_SIZE*4,labeled=True, return_image_names=False)
    sv = tf.keras.callbacks.ModelCheckpoint(
        f'{DIM}_fold_{num_fold}.h5', monitor='val_loss', verbose=0, save_best_only=True,
        save_weights_only=True, mode='min', save_freq='epoch')
    
    K.clear_session()
    with strategy.scope():
        model = build_model(dim=DIM)
        
    if LOAD_WEIGHTS:
        model.load_weights(f'../input/weights-for-tuning-melanoma/{DIM}_fold_{num_fold}.h5')
        print(f'Load Weights For Fold {num_fold}')
    else:
        history = model.fit(
                                train_dataset,
                                epochs=EPOCHS,
                                callbacks=[sv, get_lr_callback(BATCH_SIZE)],
                                validation_data = valid_dataset,
                                steps_per_epoch = count_data_items(files_temp_train)//BATCH_SIZE
                               )

        print('Loading best model...')
        model.load_weights(f'{DIM}_fold_{num_fold}.h5')
    
    
    # PREDICT OOF USING TTA
    print('Predicting OOF with TTA...')
    valid_dataset = get_dataset(files_temp_valid, repeat=True, shuffle=False, augment=True,
                                dim=DIM, batch_size=BATCH_SIZE*4, labeled=False,return_image_names=False)
    ct_valid = count_data_items(files_temp_valid); 
    STEPS = int(round(TTA * ct_valid/BATCH_SIZE/4)+1)
    pred = model.predict(valid_dataset,steps=STEPS)[:TTA*ct_valid,] 
    oof_pred.append( np.mean(pred.reshape((ct_valid,TTA),order='F'),axis=1) )                 
    
    # GET OOF TARGETS AND NAMES
    valid_dataset = get_dataset(files_temp_valid, repeat=False, shuffle=False, augment=False,
                                dim=DIM, batch_size=BATCH_SIZE*4, labeled=True, return_image_names=True)
    
    oof_tar.append( np.array([target.numpy() for img, target in iter(valid_dataset.unbatch())]) )
    oof_folds.append( np.ones_like(oof_tar[-1],dtype='int8')*num_fold )
    valid_dataset = get_dataset(files_temp_valid, repeat=False, shuffle=False, augment=False,
                                dim=DIM, batch_size=BATCH_SIZE*4, labeled=False, return_image_names=True)
#     ds = get_dataset(files_valid, augment=False, repeat=False, dim=IMG_SIZES[fold],
#                 labeled=False, return_image_names=True)
    oof_names.append( np.array([img_name.numpy().decode("utf-8") for img, img_name in iter(valid_dataset.unbatch())]))
    
    # PREDICT TEST USING TTA
    print('Predicting Test with TTA...')
    test_dataset = get_dataset(files_test, repeat=True, shuffle=False, augment=True, 
                               dim=DIM, batch_size=BATCH_SIZE*4, labeled=False, return_image_names=False)
    ct_test = count_data_items(files_test); 
    STEPS = int(round(TTA * ct_test/BATCH_SIZE/4)+1)
    pred = model.predict(test_dataset,steps=STEPS)[:TTA*ct_test,] 
    preds[:,0] += np.mean(pred.reshape((ct_test,TTA),order='F'),axis=1) * WGTS[num_fold]
    
    # REPORT RESULTS
    auc = roc_auc_score(oof_tar[-1],oof_pred[-1])
    oof_val.append(np.max( history.history['val_training_AUC'] ))
    print('#### FOLD %i OOF AUC without TTA = %.3f, with TTA = %.3f'%(num_fold,oof_val[-1],auc))
#     print('#### FOLD %i OOF AUC with TTA = %.3f'%(num_fold,auc))
    del history
    
    

    

# Calculate OOF CV AUC 

In [None]:
# COMPUTE OVERALL OOF AUC
oof = np.concatenate(oof_pred); 
true = np.concatenate(oof_tar);
names = np.concatenate(oof_names); 
folds = np.concatenate(oof_folds)
auc = roc_auc_score(true,oof)
print('Overall OOF AUC with TTA = %.3f'%auc)

# SAVE OOF TO DISK
df_oof = pd.DataFrame(dict(
    image_name = names, target=true, pred = oof, fold=folds))
df_oof.to_csv('oof.csv',index=False)
df_oof.head()

In [None]:
if USE_ISIC2019:
    df_oof_no_isic2019 = df_oof.set_index('image_name').join(train_df_from_image.set_index('image_name')['patient_id'])
    df_oof_no_isic2019 = df_oof_no_isic2019[df_oof_no_isic2019['patient_id'].notnull()].drop('patient_id', axis=1)
    print('Overall OOF AUC with TTA on internal = %.3f' % roc_auc_score(df_oof_no_isic2019['target'],df_oof_no_isic2019['pred']))
    df_oof_no_isic2019.to_csv('oof_no_isic.csv', index=False)

# Submit DataSet 

In [None]:
ds = get_dataset(files_test, augment=False, repeat=False, dim=DIM,
                 labeled=False, return_image_names=True)

image_names = np.array([img_name.numpy().decode("utf-8") 
                        for img, img_name in iter(ds.unbatch())])

In [None]:
submission = pd.DataFrame(dict(image_name=image_names, target=preds[:,0]))
submission = submission.sort_values('image_name') 
submission.to_csv('submission.csv', index=False)
submission.head()