In [None]:
# General libraries
import os
import shutil
import numpy as np
import pandas as pd
import random
import cv2
import matplotlib.pyplot as plt
from PIL import Image
from IPython.display import clear_output
from math import pi
from math import cos
from math import floor
from sklearn.model_selection import KFold
from kaggle_datasets import KaggleDatasets
import albumentations as alb
from sklearn.metrics import roc_auc_score

# Deep learning libraries
from tensorflow.keras import layers
import tensorflow.keras.backend as K
from keras import models
from keras import backend
from keras.callbacks import Callback
from keras.models import Model, Sequential
from keras.layers import Input, Dense, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, SeparableConv2D, MaxPool2D, LeakyReLU, Activation
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
import tensorflow as tf, re, math
from keras import optimizers

!pip install -q efficientnet >> /dev/null
import efficientnet.tfkeras as efn

DATA_SETS_2020 = {'train' : '../input/siim-isic-melanoma-classification/train.csv',
                  'test' : '../input/siim-isic-melanoma-classification/test.csv',
                  'sub' : '../input/siim-isic-melanoma-classification/sample_submission.csv'}

# USE DIFFERENT SEED FOR DIFFERENT STRATIFIED KFOLD
SEED = 13

# NUMBER OF FOLDS. USE 3, 5, OR 15 
FOLDS = 5

# WHICH IMAGE SIZES TO LOAD EACH FOLD
# CHOOSE 128, 256, 384, 512, 768
IMG_SIZES = [384] * FOLDS

# INCLUDE OLD COMP DATA?
INC2019 = [True] * FOLDS
INC2018_2017 = [True] * FOLDS

# SEGMENTED DATASET?
SEGMENTED = False

# BATCH SIZE AND EPOCHS
BATCH_SIZES = [32] * FOLDS
EPOCHS = [15] * FOLDS

# WHICH EFFICIENTNET
EFF_NETS = [6] * FOLDS

# MIN LEARNING RATE
LR_MIN = 1e-6

# TRAINING VERBOSE
VERBOSE = False

# WEIGHTS FOR FOLD MODELS WHEN PREDICTING TEST
WGTS = [1/FOLDS] * FOLDS

# AUGMENTATION PARAMETERS
ROT_ = 180.0
SHR_ = 8.0
HZOOM_ = 14.0
WZOOM_ = 14.0
HSHIFT_ = 14.0
WSHIFT_ = 14.0

ADD_HAIR_PROB = 0.5
ADD_HAIR = True
N_HAIRS = 10

CUTOUT_PROB = 0.25
CUTOUT = True

SOLARIZE_PROB = 0.25
EQUALIZE_PROB = 0.25

# TEST TIME AUGMENTATION STEPS
TTA = 11

# TPU CONFIGURATION
DEVICE = 'TPU'
print("connecting to TPU...")
if DEVICE == "TPU":
    print("connecting to TPU...")
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        print("Could not connect to TPU")
        tpu = None

    if tpu:
        try:
            print("initializing  TPU ...")
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("TPU initialized")
        except _:
            print("failed to initialize TPU")
    else:
        DEVICE = "GPU"

if DEVICE != "TPU":
    print("Using default strategy for CPU and single GPU")
    strategy = tf.distribute.get_strategy()

if DEVICE == "GPU":
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

AUTO     = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

# TFRecords augmentation

In [None]:
def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear    = math.pi * shear    / 180.

    def get_3x3_mat(lst):
        return tf.reshape(tf.concat([lst],axis=0), [3,3])
    
    # ROTATION MATRIX
    c1   = tf.math.cos(rotation)
    s1   = tf.math.sin(rotation)
    one  = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    
    rotation_matrix = get_3x3_mat([c1,   s1,   zero, 
                                   -s1,  c1,   zero, 
                                   zero, zero, one])    
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)    
    
    shear_matrix = get_3x3_mat([one,  s2,   zero, 
                                zero, c2,   zero, 
                                zero, zero, one])        
    # ZOOM MATRIX
    zoom_matrix = get_3x3_mat([one/height_zoom, zero,           zero, 
                               zero,            one/width_zoom, zero, 
                               zero,            zero,           one])    
    # SHIFT MATRIX
    shift_matrix = get_3x3_mat([one,  zero, height_shift, 
                                zero, one,  width_shift, 
                                zero, zero, one])
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), 
                 K.dot(zoom_matrix,     shift_matrix))

def transform(image, DIM=256):    
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    XDIM = DIM%2 #fix for size 331
    
    rot = ROT_ * tf.random.normal([1], dtype='float32')
    shr = SHR_ * tf.random.normal([1], dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1], dtype='float32') / HZOOM_
    w_zoom = 1.0 + tf.random.normal([1], dtype='float32') / WZOOM_
    h_shift = HSHIFT_ * tf.random.normal([1], dtype='float32') 
    w_shift = WSHIFT_ * tf.random.normal([1], dtype='float32') 

    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x   = tf.repeat(tf.range(DIM//2, -DIM//2,-1), DIM)
    y   = tf.tile(tf.range(-DIM//2, DIM//2), [DIM])
    z   = tf.ones([DIM*DIM], dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m, tf.cast(idx, dtype='float32'))
    idx2 = K.cast(idx2, dtype='int32')
    idx2 = K.clip(idx2, -DIM//2+XDIM+1, DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack([DIM//2-idx2[0,], DIM//2-1+idx2[1,]])
    d    = tf.gather_nd(image, tf.transpose(idx3))
        
    return tf.reshape(d,[DIM, DIM,3])

https://www.kaggle.com/cdeotte/tfrecord-experiments-upsample-and-coarse-dropout

In [None]:
def dropout(image, DIM=256, PROBABILITY = 0.75, CT = 4, SZ = 0.2):
    # input - one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image with CT squares of side size SZ*DIM removed

    # DO DROPOUT WITH PROBABILITY DEFINED ABOVE
    P = tf.cast( tf.random.uniform([],0,1) < PROBABILITY, tf.int32)
    if (P == 0)|(CT == 0)|(SZ == 0): return image

    for k in range( CT ):
        # CHOOSE RANDOM LOCATION
        x = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
        y = tf.cast( tf.random.uniform([],0,DIM),tf.int32)
        # COMPUTE SQUARE 
        WIDTH = tf.cast( SZ*DIM,tf.int32) * P
        ya = tf.math.maximum(0,y-WIDTH//2)
        yb = tf.math.minimum(DIM,y+WIDTH//2)
        xa = tf.math.maximum(0,x-WIDTH//2)
        xb = tf.math.minimum(DIM,x+WIDTH//2)
        # DROPOUT IMAGE
        one = image[ya:yb,0:xa,:]
        two = tf.zeros([yb-ya,xb-xa,3]) 
        three = image[ya:yb,xb:DIM,:]
        middle = tf.concat([one,two,three],axis=1)
        image = tf.concat([image[0:ya,:,:],middle,image[yb:DIM,:,:]],axis=0)

    # RESHAPE HACK SO TPU COMPILER KNOWS SHAPE OF OUTPUT TENSOR 
    image = tf.reshape(image,[DIM,DIM,3])
    return image

https://www.kaggle.com/szacho/augmix-data-augmentation-on-tpu

In [None]:
def float_parameter(level, maxval):
    return tf.cast((level) * maxval / 10., tf.float32)

def sample_level(n):
    return tf.random.uniform(shape=[1], minval=0.1, maxval=n, dtype=tf.float32)

def solarize_add(image, level, probability=1.0):
    P = tf.cast( tf.random.uniform([],0,1) < probability, tf.int32)
    if P == 0: 
        return image
    # For each pixel in the image less than threshold
    # we add 'addition' amount to it and then clip the
    # pixel value to be between 0 and 255. The value
    # of 'addition' is between -128 and 128.
    threshold = float_parameter(sample_level(level), 1)
    addition = float_parameter(sample_level(level), 0.5)
    rand_var = tf.random.uniform(shape=[], dtype=tf.float32)
    addition = tf.cond(rand_var > 0.5, lambda: addition, lambda: -addition)

    added_image = tf.cast(image, tf.float32) + addition
    added_image = tf.cast(tf.clip_by_value(added_image, 0, 1), tf.float32)
    return tf.where(image < threshold, added_image, image)

def equalize(image, probability=1.0):
    P = tf.cast( tf.random.uniform([],0,1) < probability, tf.int32)
    if P == 0: 
        return image
    image = tf.cast(tf.math.scalar_mul(255, image), tf.uint8)

    def scale_channel(im, c):
        im = tf.cast(im[:, :, c], tf.int32)
        # Compute the histogram of the image channel.
        histo = tf.histogram_fixed_width(im, [0, 255], nbins=256)
        # For the purposes of computing the step, filter out the nonzeros.
        nonzero = tf.where(tf.not_equal(histo, 0))
        nonzero_histo = tf.reshape(tf.gather(histo, nonzero), [-1])
        step = (tf.reduce_sum(nonzero_histo) - nonzero_histo[-1]) // 255

        def build_lut(histo, step):
            # Compute the cumulative sum, shifting by step // 2
            # and then normalization by step.
            lut = (tf.cumsum(histo) + (step // 2)) // step
            # Shift lut, prepending with 0.
            lut = tf.concat([[0], lut[:-1]], 0)
            # Clip the counts to be in range.  This is done
            # in the C code for image.point.
            return tf.clip_by_value(lut, 0, 255)

        # If step is zero, return the original image.  Otherwise, build
        # lut from the full histogram and step and then index from it.
        result = tf.cond(tf.equal(step, 0),
                        lambda: im,
                        lambda: tf.gather(build_lut(histo, step), im))

        return tf.cast(result, tf.uint8)

    # Assumes RGB for now.  Scales each channel independently
    # and then stacks the result.
    s1 = scale_channel(image, 0)
    s2 = scale_channel(image, 1)
    s3 = scale_channel(image, 2)
    image = tf.stack([s1, s2, s3], 2)

    return tf.cast(tf.clip_by_value(tf.math.divide(image, 255), 0, 1), tf.float32)

def autocontrast(image, probability=1.0):
    P = tf.cast( tf.random.uniform([],0,1) < probability, tf.int32)
    if P == 0: 
        return image
    image = tf.cast(tf.math.scalar_mul(255, image), tf.uint8)

    def scale_channel(image):
        # A possibly cheaper version can be done using cumsum/unique_with_counts
        # over the histogram values, rather than iterating over the entire image.
        # to compute mins and maxes.
        lo = tf.cast(tf.reduce_min(image), tf.float32)
        hi = tf.cast(tf.reduce_max(image), tf.float32)

        # Scale the image, making the lowest value 0 and the highest value 255.
        def scale_values(im):
            scale = 255.0 / (hi - lo)
            offset = -lo * scale
            im = tf.cast(im, tf.float32) * scale + offset
            im = tf.clip_by_value(im, 0.0, 255.0)
            return tf.cast(im, tf.uint8)

        result = tf.cond(hi > lo, lambda: scale_values(image), lambda: image)
        return result

    # Assumes RGB for now.  Scales each channel independently
    # and then stacks the result.
    s1 = scale_channel(image[:, :, 0])
    s2 = scale_channel(image[:, :, 1])
    s3 = scale_channel(image[:, :, 2])
    image = tf.stack([s1, s2, s3], 2)
    return tf.cast(tf.clip_by_value(tf.math.divide(image, 255), 0, 1), tf.float32)

Hair adding
* Source: https://www.kaggle.com/graf10a/siim-data-augmentation-in-tf-hair-batch-affine

In [None]:
GCS_PATH={}
GCS_PATH['hairs']=KaggleDatasets().get_gcs_path('melanoma-hairs')
hair_images=tf.io.gfile.glob(GCS_PATH['hairs'] + '/*.png')
hair_images_tf=tf.convert_to_tensor(hair_images)

def hair_aug(image, dim=384, n_max=6, probability=1.0):
    P = tf.cast( tf.random.uniform([],0,1) < probability, tf.int32)
    if P == 0: 
        return image
    # Copy the input image, so it won't be changed
    img=tf.identity(image) 
    # Randomly choose the number of hairs to augment (up to n_max)
    n_hairs = tf.random.uniform(shape=[], maxval=tf.constant(n_max)+1, dtype=tf.int32)
    
    im_height=tf.shape(img)[1]
    im_width=tf.shape(img)[0]
    
    if n_hairs == 0:
        return img

    for _ in tf.range(n_hairs):
        # Read a random hair image
        i=tf.random.uniform(shape=[], maxval=tf.shape(hair_images_tf)[0], 
                            dtype=tf.int32)
        fname=hair_images_tf[i]

        bits = tf.io.read_file(fname)
        hair = tf.image.decode_jpeg(bits)
        
        scale=tf.cast(dim/256, dtype=tf.int32)
        
        # Rescale the hair image to the right size (256 -- original size)
        new_width=scale*tf.shape(hair)[1]
        new_height=scale*tf.shape(hair)[0]
        hair = tf.image.resize(hair, [new_height, new_width])

        
        # Random flips of the hair image
        hair = tf.image.random_flip_left_right(hair)
        hair = tf.image.random_flip_up_down(hair)
        # Random number of 90 degree rotations
        n_rot=tf.random.uniform(shape=[], maxval=4,
                                dtype=tf.int32)
        hair = tf.image.rot90(hair, k=n_rot)
        
        h_height=tf.shape(hair)[0]
        h_width=tf.shape(hair)[1]
        
        roi_h0 = tf.random.uniform(shape=[], maxval=im_height - h_height + 1, 
                                    dtype=tf.int32)
        roi_w0 = tf.random.uniform(shape=[], maxval=im_width - h_width + 1, 
                                    dtype=tf.int32)


        roi = img[roi_h0:(roi_h0 + h_height), roi_w0:(roi_w0 + h_width)]  

        # Convert the hair image to grayscale 
        # (slice to remove the trainsparency channel)
        hair2gray = tf.image.rgb_to_grayscale(hair[:, :, :3])

        mask=hair2gray>10

        img_bg = tf.multiply(roi, tf.cast(tf.image.grayscale_to_rgb(~mask),
                                          dtype=tf.float32))
        hair_fg = tf.multiply(tf.cast(hair[:, :, :3], dtype=tf.int32),
                              tf.cast(tf.image.grayscale_to_rgb(mask), 
                                      dtype=tf.int32
                                      )
                             )

        dst = tf.add(img_bg, tf.cast(hair_fg, dtype=tf.float32)/255)

        paddings = tf.stack([
            [roi_h0, im_height-(roi_h0 + h_height)], 
            [roi_w0, im_width-(roi_w0 + h_width)],
            [0, 0]
        ])

        # Pad dst with zeros to make it the same shape as image.
        dst_padded=tf.pad(dst, paddings, "CONSTANT")
        # Create a boolean mask with zeros at the pixels of
        # the augmentation segment and ones everywhere else
        mask_img=tf.pad(tf.ones_like(dst), paddings, "CONSTANT")
        mask_img=~tf.cast(mask_img, dtype=tf.bool)
        # Make a hole in the original image at the location
        # of the augmentation segment
        img_hole=tf.multiply(img, tf.cast(mask_img, dtype=tf.float32))
        # Inserting the augmentation segment in place of the hole
        img=tf.add(img_hole, dst_padded)
        
    return img

# TFRecords handling

In [None]:
def read_labeled_tfrecord(example):
    tfrec_format = {
        'image'                        : tf.io.FixedLenFeature([], tf.string),
        'image_name'                   : tf.io.FixedLenFeature([], tf.string),
        'patient_id'                   : tf.io.FixedLenFeature([], tf.int64),
        'sex'                          : tf.io.FixedLenFeature([], tf.int64),
        'age_approx'                   : tf.io.FixedLenFeature([], tf.int64),
        'anatom_site_general_challenge': tf.io.FixedLenFeature([], tf.int64),
        'diagnosis'                    : tf.io.FixedLenFeature([], tf.int64),
        'target'                       : tf.io.FixedLenFeature([], tf.int64)
    }           
    example = tf.io.parse_single_example(example, tfrec_format)
    return example['image'], example['target']

def read_unlabeled_tfrecord(example, return_image_name):
    tfrec_format = {
        'image'                        : tf.io.FixedLenFeature([], tf.string),
        'image_name'                   : tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    return example['image'], example['image_name'] if return_image_name else 0
 
def prepare_image(img, augment=True, dim=256):    
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.cast(img, tf.float32) / 255.0
    
    if augment:
        if ADD_HAIR == True: 
            img = hair_aug(img, n_max=N_HAIRS, probability=ADD_HAIR_PROB)
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_saturation(img, 0.7, 1.3)
        img = tf.image.random_contrast(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.2)
        img = transform(img, DIM=dim)
        if CUTOUT == True:
            img = dropout(img, DIM=dim, PROBABILITY = CUTOUT_PROB, CT = 1, SZ = 0.25)
        
        img = solarize_add(img, level=1, probability=SOLARIZE_PROB)
        img = equalize(img, probability=EQUALIZE_PROB)
        img = autocontrast(img, probability=0.5)
        
    img = tf.reshape(img, [dim,dim, 3])
    return img

def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

def get_dataset(files, augment = False, shuffle = False, repeat = False, labeled=True, return_image_names=True, batch_size=16, dim=256):
    ds = tf.data.TFRecordDataset(files, num_parallel_reads=AUTO)
    ds = ds.cache()
    
    if repeat:
        ds = ds.repeat()
    
    if shuffle: 
        ds = ds.shuffle(1024*8)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        ds = ds.with_options(opt)
        
    if labeled: 
        ds = ds.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
    else:
        ds = ds.map(lambda example: read_unlabeled_tfrecord(example, return_image_names), num_parallel_calls=AUTO)      
    
    ds = ds.map(lambda img, imgname_or_label: (prepare_image(img, augment=augment, dim=dim), imgname_or_label), num_parallel_calls=AUTO)
    ds = ds.batch(batch_size * REPLICAS)
    ds = ds.prefetch(AUTO)
    return ds

def prepare_k_datasets():
    GCS_PATH = [None]*FOLDS
    
    for i,k in enumerate(IMG_SIZES):
        if SEGMENTED == False:
            GCS_PATH[i] = KaggleDatasets().get_gcs_path('isic2020and2019-%ix%i-tfrec'%(k,k))
        else:
            GCS_PATH[i] = KaggleDatasets().get_gcs_path('isic2020and2019-%ix%i-tfrec-segmented'%(k,k))
    
    skf = KFold(n_splits=FOLDS, shuffle=True, random_state=SEED)
    train_folds = []; valid_folds = []; test_folds = []
    files_train = []; files_valid = []; files_test = []
    
    for k, (idxT, idxV) in enumerate(skf.split(np.arange(15))):
        # CREATE TRAIN AND VALIDATION SUBSETS
        files_train.append(tf.io.gfile.glob([GCS_PATH[k] + '/2020_train%.2i*.tfrec'%x for x in idxT]))
        files_valid.append(tf.io.gfile.glob([GCS_PATH[k] + '/2020_train%.2i*.tfrec'%x for x in idxV]))
        files_test.append(np.sort(np.array(tf.io.gfile.glob(GCS_PATH[k] + '/2020_test*.tfrec'))))
        
        if INC2019[k]:
            files_train[k] += tf.io.gfile.glob([GCS_PATH[k] + '/2019_train%.2i*.tfrec'%x for x in idxT*2+1])
            
        if INC2018_2017[k]:
            files_train[k] += tf.io.gfile.glob([GCS_PATH[k] + '/2019_train%.2i*.tfrec'%x for x in idxT*2])
            
        train_ds = get_dataset(files_train, augment=True, shuffle=True, repeat=True, dim=IMG_SIZES[k], batch_size = BATCH_SIZES[k])    
        valid_ds = get_dataset(files_valid, augment=False, shuffle=False, repeat=False, dim=IMG_SIZES[k])
        test_ds  = get_dataset(files_test, labeled=False, return_image_names=False, augment=False, shuffle=False, repeat=False, dim=IMG_SIZES[k])
        
        train_folds.append(train_ds)
        valid_folds.append(valid_ds)
        test_folds.append(test_ds)
        
    return train_folds, valid_folds, test_folds, files_train, files_valid, files_test
    
def plot_batch(fold):
    batch = fold.unbatch().batch(BATCH_SIZES[0])
    batch = iter(batch)
    batch = next(batch)
    images = batch[0]
    batch_size = images.shape[0]
    
    plt.figure(figsize=(30,10))
    for i in range(0, batch_size):
        image = images[i]
        plt.subplot((int)(batch_size/8), 8, (int)(i+1))
        plt.imshow(image)

In [None]:
train_folds, valid_folds, test_folds, files_train, files_valid, files_test = prepare_k_datasets()
print('TRAIN BATCH')
plot_batch(train_folds[0])

In [None]:
print('VALID BATCH')
plot_batch(valid_folds[0])

In [None]:
print('TEST BATCH')
plot_batch(test_folds[0])

# Model define and training

In [None]:
# Download models
EFNS = [efn.EfficientNetB0, efn.EfficientNetB1, efn.EfficientNetB2, efn.EfficientNetB3, 
        efn.EfficientNetB4, efn.EfficientNetB5, efn.EfficientNetB6, efn.EfficientNetB7]
        
def get_lr_callback_CA(batch_size=32, epochs=12, cycles=2, lr_max=1e-4, fold_number=0):   
    def lrfn(epoch):
        epochs_per_cycle = floor(epochs/cycles)
        cos_inner = (pi * (epoch % epochs_per_cycle)) / (epochs_per_cycle)
        lr = lr_max/2 * (cos(cos_inner) + 1)
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    
    rng = [i for i in range(epochs if EPOCHS[fold_number]<epochs else EPOCHS[fold_number])]
    y = [lrfn(x) for x in rng]
    plt.plot(rng, y)
    plt.title("Learning rate schedule: {:.3g} to {:.3g}".format(y[0], y[-1]))
    plt.xlabel('Epoch')
    plt.ylabel('Learning rate')
    plt.tight_layout()
    plt.show()
    
    return lr_callback        

def get_lr_callback(batch_size=8, epochs=12, fold_number=0):
    lr_start   = LR_MIN
    lr_max     = 0.00000125 * REPLICAS * batch_size
    lr_min     = LR_MIN
    lr_ramp_ep = 5
    lr_sus_ep  = 0
    lr_decay   = 0.8
   
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    
    rng = [i for i in range(epochs if EPOCHS[fold_number]<epochs else EPOCHS[fold_number])]
    y = [lrfn(x) for x in rng]
    plt.plot(rng, y)
    plt.title("Learning rate schedule: {:.3g} to {:.3g} to {:.3g}".format(y[0], max(y), y[-1]))
    plt.xlabel('Epoch')
    plt.ylabel('Learning rate')
    plt.tight_layout()
    plt.show()
    
    return lr_callback

def build_model(img_dims, ef):
    inp = tf.keras.layers.Input(shape=(img_dims, img_dims, 3))
    base = EFNS[ef](input_shape=(img_dims, img_dims, 3), weights='imagenet', include_top=False)
    x = base(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = Dropout(0.3)(x)
    x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inp, outputs=x)

    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 
                  loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.05), 
                  metrics=['AUC', 'binary_accuracy'])
    return model

def train_model(epochs_number, train_gen, valid_gen, steps_per_epoch, fold_number):
    early_stopping = tf.keras.callbacks.EarlyStopping(patience=6, restore_best_weights=True)
    checkpoint = tf.keras.callbacks.ModelCheckpoint('./FOLD' + str(fold_number+1) + '_model.h5', monitor='val_loss', verbose=False, 
                                                    save_best_only=True, save_weights_only=True, mode='min', save_freq='epoch')

    history = model.fit(train_gen,
                        epochs=epochs_number,
                        steps_per_epoch=steps_per_epoch,
                        validation_data=valid_gen,
                        callbacks=[get_lr_callback(BATCH_SIZES[fold_number], epochs=EPOCHS[fold_number], fold_number=fold_number), checkpoint, early_stopping],
                        #callbacks=[get_lr_callback_CA(BATCH_SIZES[fold_number], epochs=EPOCHS[fold_number], cycles=2, lr_max=3e-4, fold_number=fold_number), checkpoint, early_stopping],
                        verbose=VERBOSE)
    return history

In [None]:
oof_pred = []; oof_tar = []; oof_val = []; oof_names = []; oof_folds = [] 
preds = np.zeros((count_data_items(files_test[0]), 1))

for k in range(0, FOLDS):
    # DISPLAY FOLD INFO
    if DEVICE=='TPU':
        if tpu: tf.tpu.experimental.initialize_tpu_system(tpu)
    print('FOLD', k+1)
    print('Image Size %i with EfficientNet B%i and batch_size %i, INC2019=%i, INC(2018,2017)=%i'
          % (IMG_SIZES[k],EFF_NETS[k],BATCH_SIZES[k]*REPLICAS, INC2019[k], INC2018_2017[k]))
    print('Train/valid/test images count: ', count_data_items(files_train[k]), '/', count_data_items(files_valid[k]), '/', count_data_items(files_test[k]))

    print('Building...')
    K.clear_session()
    with strategy.scope():
        model = build_model(IMG_SIZES[k], EFF_NETS[k])

    print('Training...')
    history = train_model(epochs_number=EPOCHS[k], 
                          train_gen=train_folds[k], 
                          valid_gen=valid_folds[k], 
                          steps_per_epoch=count_data_items(files_train[k])/BATCH_SIZES[k]//REPLICAS, 
                          fold_number=k)

    print('Best weights loading...')
    model.load_weights('./FOLD' + str(k+1) + '_model.h5')
    
    # PREDICT OOF USING TTA
    print('Predicting OOF with TTA...')
    ds_valid = get_dataset(files_valid[k], labeled=False, return_image_names=False, augment=True, repeat=True, shuffle=False, dim=IMG_SIZES[k], batch_size=BATCH_SIZES[k] * 4)
    ct_valid = count_data_items(files_valid[k]) 
    STEPS = TTA * ct_valid / BATCH_SIZES[k] / 4 / REPLICAS
    pred = model.predict(ds_valid, steps=STEPS, verbose=VERBOSE)[:TTA * ct_valid,] 
    oof_pred.append( np.mean(pred.reshape((ct_valid, TTA), order='F'), axis=1) )
    
    # GET OOF TARGETS AND NAMES
    ds_valid = get_dataset(files_valid[k], augment=False, repeat=False, dim=IMG_SIZES[k], labeled=True, return_image_names=True)
    oof_tar.append( np.array([target.numpy() for img, target in iter(ds_valid.unbatch())]) )
    oof_folds.append( np.ones_like(oof_tar[-1], dtype='int8') * k )
    ds = get_dataset(files_valid[k], augment=False, repeat=False, dim=IMG_SIZES[k], labeled=False, return_image_names=True)
    oof_names.append( np.array([img_name.numpy().decode("utf8") for img, img_name in iter(ds.unbatch())]))
    
    # PREDICT TEST USING TTA
    print('Predicting Test with TTA...')
    ds_test = get_dataset(files_test[k], labeled=False, return_image_names=False, augment=True, repeat=True, shuffle=False, dim=IMG_SIZES[k], batch_size=BATCH_SIZES[k] * 4)
    ct_test = count_data_items(files_test[k])
    STEPS = TTA * ct_test / BATCH_SIZES[k] / 4 / REPLICAS
    pred = model.predict(ds_test, steps=STEPS, verbose=VERBOSE)[:TTA * ct_test,] 
    preds[:,0] += np.mean(pred.reshape((ct_test, TTA), order='F'), axis=1) * WGTS[k]
    
    # REPORT RESULTS
    auc = roc_auc_score(oof_tar[-1], oof_pred[-1])
    oof_val.append(np.max( history.history['val_auc'] ))
    print('OOF AUC without TTA = %.4f, with TTA = %.4f' % (oof_val[-1], auc))

    print('History ploting...')
    plt.figure(figsize=(15,5))
    plt.plot(history.history['auc'],'-o',label='Train AUC',color='#ff7f0e')
    plt.plot(history.history['val_auc'],'-o',label='Val AUC',color='#1f77b4')
    x = np.argmax( history.history['val_auc'] ); y = np.max( history.history['val_auc'] )
    xdist = plt.xlim()[1] - plt.xlim()[0]; ydist = plt.ylim()[1] - plt.ylim()[0]
    plt.scatter(x,y,s=200,color='#1f77b4'); plt.text(x-0.03*xdist,y-0.13*ydist,'max auc\n%.4f'%y,size=14)
    plt.ylabel('AUC',size=14); plt.xlabel('Epoch',size=14)
    plt.legend(loc=2)
    plt2 = plt.gca().twinx()
    plt2.plot(history.history['loss'],'-o',label='Train Loss',color='#2ca02c') 
    plt2.plot(history.history['val_loss'],'-o',label='Val Loss',color='#d62728')
    x = np.argmin( history.history['val_loss'] ); y = np.min( history.history['val_loss'] )
    ydist = plt.ylim()[1] - plt.ylim()[0]
    plt.scatter(x,y,s=200,color='#d62728'); plt.text(x-0.03*xdist,y+0.05*ydist,'min loss\n%.4f'%y,size=14)
    plt.ylabel('Loss',size=14)
    plt.title('FOLD %i - Image Size %i, EfficientNet B%i'%(k+1,IMG_SIZES[k],EFF_NETS[k]),size=18)
    plt.legend(loc=3)
    plt.show() 

# Calculate OOF AUC

In [None]:
# COMPUTE OVERALL OOF AUC
oof = np.concatenate(oof_pred)
true = np.concatenate(oof_tar)
names = np.concatenate(oof_names) 
folds = np.concatenate(oof_folds)
auc = roc_auc_score(true, oof)
print('Overall OOF AUC with TTA = %.4f' % auc)

# SAVE OOF TO DISK
df_oof = pd.DataFrame(dict(image_name=names, target=true, pred=oof, fold=folds))
df_oof.to_csv('oof.csv', index=False)
df_oof.head()

# Submission to Competition

In [None]:
ds = get_dataset(files_test[0], augment=False, repeat=False, dim=IMG_SIZES[0], labeled=False, return_image_names=True)
image_names = np.array([img_name.numpy().decode("utf-8") for img, img_name in iter(ds.unbatch())])

submission = pd.DataFrame(dict(image_name=image_names, target=preds[:,0]))
submission = submission.sort_values('image_name') 
submission.to_csv('./submission.csv', index=False)

print('Max value: ', np.amax(preds[:,0]))
print('Min value: ', np.amin(preds[:,0]))
print('Mean: ', np.mean(preds[:,0]))

plt.hist(submission.target,bins=100)
plt.show()