# **PACKAGES**
Import necessary packages.

In [None]:
!pip install tensorflow-addons==0.9.1
import numpy as np
import pandas as pd
import os, math
import tensorflow as tf
import tensorflow.keras.backend as K
from kaggle_datasets import KaggleDatasets

import tensorflow_addons as tfa
from matplotlib import pyplot as plt
import matplotlib as mpl

from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam

# **TPU / GPU / CPU**

In [None]:
def get_strategy():
    
    """Detect hardware, return appropriate distribution strategy."""
    
    gpu = ""
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU', tpu.cluster_spec().as_dict()['worker'])
        
    except ValueError:
        tpu = None
        os.environ["CUDA_VISIBLE_DEVICES"] = "0"
        gpu = tf.config.list_physical_devices("GPU")
        if len(gpu) == 1:
            print('Running on GPU', gpu)
    
    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
        tf.config.optimizer.set_jit(True)
        print('Accelerated Linear Algebra enabled')
        GCS_PATH = KaggleDatasets().get_gcs_path('siim-isic-melanoma-classification')
        !gsutil ls $GCS_PATH
    
    elif len(gpu) == 1:
        strategy = tf.distribute.OneDeviceStrategy(device = "/gpu:0")
        tf.config.optimizer.set_experimental_options({"auto_mixed_precision": True})
        GCS_PATH = '/kaggle/input/siim-isic-melanoma-classification/'
    
    else:
        # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
        strategy = tf.distribute.get_strategy()
        GCS_PATH = '/kaggle/input/siim-isic-melanoma-classification/'
    
    print("REPLICAS:", strategy.num_replicas_in_sync)
    base_dir = '/kaggle/input/siim-isic-melanoma-classification/'
    
    return strategy, GCS_PATH, base_dir

strategy, GCS_PATH, base_dir = get_strategy()

# DATA PIPELINE
Use TFRecords.

In [None]:
training_files = tf.io.gfile.glob(GCS_PATH + '/tfrecords/train*.tfrec')
test_files = tf.io.gfile.glob(GCS_PATH + '/tfrecords/test*.tfrec')

IMAGE_SIZE = [1024,1024]
BATCH_SIZE = 12 * strategy.num_replicas_in_sync

seed = 42

def parse_rec_train(data):
    LABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "target": tf.io.FixedLenFeature([], tf.int64),
    }
    file = tf.io.parse_single_example(data, LABELED_TFREC_FORMAT)
    image = tf.image.decode_image(file["image"])
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    image = tf.cast(image, tf.float32)
    image = tf.keras.applications.inception_v3.preprocess_input(image)
    target = tf.cast(file["target"], tf.float32)
    return image, target

ROT_ = 180.0
SHR_ = 2.0
HZOOM_ = 3.0
WZOOM_ = 3.0
HSHIFT_ = 5.0
WSHIFT_ = 5.0

def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transform matrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear    = math.pi * shear    / 180.

    def get_3x3_mat(lst):
        return tf.reshape(tf.concat([lst],axis=0), [3,3])
    
    # ROTATION MATRIX
    c1   = tf.math.cos(rotation)
    s1   = tf.math.sin(rotation)
    one  = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    
    rotation_matrix = get_3x3_mat([c1,   s1,   zero, 
                                   -s1,  c1,   zero, 
                                   zero, zero, one])    
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)    
    
    shear_matrix = get_3x3_mat([one,  s2,   zero, 
                                zero, c2,   zero, 
                                zero, zero, one])        
    # ZOOM MATRIX
    zoom_matrix = get_3x3_mat([one/height_zoom, zero,           zero, 
                               zero,            one/width_zoom, zero, 
                               zero,            zero,           one])    
    # SHIFT MATRIX
    shift_matrix = get_3x3_mat([one,  zero, height_shift, 
                                zero, one,  width_shift, 
                                zero, zero, one])
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), 
                 K.dot(zoom_matrix,     shift_matrix))

def transforms(image, DIM=1024):    
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    XDIM = DIM%2 #fix for size 331
    
    rot = ROT_ * tf.random.normal([1], dtype='float32')
    shr = SHR_ * tf.random.normal([1], dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1], dtype='float32') / HZOOM_
    w_zoom = 1.0 + tf.random.normal([1], dtype='float32') / WZOOM_
    h_shift = HSHIFT_ * tf.random.normal([1], dtype='float32') 
    w_shift = WSHIFT_ * tf.random.normal([1], dtype='float32') 

    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x   = tf.repeat(tf.range(DIM//2, -DIM//2,-1), DIM)
    y   = tf.tile(tf.range(-DIM//2, DIM//2), [DIM])
    z   = tf.ones([DIM*DIM], dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m, tf.cast(idx, dtype='float32'))
    idx2 = K.cast(idx2, dtype='int32')
    idx2 = K.clip(idx2, -DIM//2+XDIM+1, DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack([DIM//2-idx2[0,], DIM//2-1+idx2[1,]])
    d    = tf.gather_nd(image, tf.transpose(idx3))
        
    return tf.reshape(d,[DIM, DIM,3])

def transform(image, target):
    image = transforms(image)
    image = tf.image.rot90(image, k = np.random.randint(4))
    image = tf.image.random_flip_left_right(image, seed = seed)
    image = tf.image.random_flip_up_down(image, seed = seed)
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image, target

# OVERSAMPLING

In [None]:
from collections import Counter

AUTO = tf.data.experimental.AUTOTUNE

split = int(len(training_files) * 0.8)
training_filenames = training_files[:split]
validation_filenames = training_files[split:]

def get_num_of_repetition_for_example(training_example):
    
    _, label = training_example
    
    if label == 1.0:
        num_to_repeat = 15
    else:
        num_to_repeat = 1
        
    return tf.cast(num_to_repeat, tf.int64)

train_dataset = tf.data.TFRecordDataset(training_filenames, num_parallel_reads = AUTO).map(parse_rec_train, num_parallel_calls = AUTO)
train_dataset = train_dataset.flat_map(lambda image, label: tf.data.Dataset.from_tensors((image, label)).repeat(get_num_of_repetition_for_example((image, label))))

label_counter = Counter()

for images, labels in train_dataset:
    label_counter.update([labels.numpy()])

TRAIN_SIZE = sum([label_counter[x] for x in label_counter])
print("Number of examples in the oversampled training dataset: {}".format(TRAIN_SIZE))

print("Number of positive train examples: {}".format(label_counter[1.0])) 
print("Number of negative train examples: {}".format(label_counter[0.0]))

# val_counter = Counter()

# val_ds = tf.data.TFRecordDataset(validation_filenames, num_parallel_reads = AUTO).map(parse_rec_train, num_parallel_calls = AUTO)

# for images, labels in val_ds:
#     val_counter.update([labels.numpy()])

# print("Number of positive val examples: {}".format(val_counter[1.0])) # 187
# print("Number of negative val examples: {}".format(val_counter[0.0])) # 10158

# del val_ds

STEPS_PER_EPOCH = int(np.ceil(TRAIN_SIZE / BATCH_SIZE))

Define a learning rate function for the LearningRateScheduler. Also include an EarlyStopping to stop training when a monitored metric has stopped improving.

In [None]:
# LR_START = 0.00001
# LR_MAX = 0.000005 * strategy.num_replicas_in_sync
# LR_MIN = 0.00001
# LR_RAMPUP_EPOCHS = 8
# LR_SUSTAIN_EPOCHS = 0
# LR_EXP_DECAY = 0.8

def scheduler(epoch):
    if epoch < 5:
        return 0.001
    else:
        return 0.001 * tf.math.exp(0.1 * (5 - epoch))
    
lr_callback = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 3e-4 * (0.70 ** (epoch/3)), verbose=True)
es_callback = tf.keras.callbacks.EarlyStopping(monitor = 'val_auc', patience = 15, verbose = 1, mode = 'max', restore_best_weights = True)

> # **MODEL 1 & TRAINING**
Instantiating the model in the strategy scope creates the model on the TPU. Train the model with the initial bias.

In [None]:
train_dataset = train_dataset.map(transform, num_parallel_calls = AUTO)
train_dataset = train_dataset.repeat().shuffle(10000)
train_dataset = train_dataset.batch(BATCH_SIZE).prefetch(AUTO)
    
val_dataset = tf.data.TFRecordDataset(validation_filenames, num_parallel_reads = AUTO).map(parse_rec_train, num_parallel_calls = AUTO)
val_dataset = val_dataset.batch(BATCH_SIZE)
val_dataset = val_dataset.prefetch(AUTO)


with strategy.scope():
    base_model1 = tf.keras.applications.InceptionV3(weights = 'imagenet', include_top = False, pooling = 'avg', input_shape = (1024, 1024, 3))
    
    
#     for layer in base_model1.layers:
#         layer.trainable = False
        
    for layer in base_model1.layers:
        layer.trainable = True
        if isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.momentum = 0.9
    
    for layer in base_model1.layers[:10]:
        if not isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = False
    
    x = base_model1.output
#     x = tf.keras.layers.GlobalAveragePooling2D()(x)
#     x = tf.keras.layers.Dense(1000, activation = 'relu')(x)
    x = tf.keras.layers.Dense(3000, activation = 'relu')(x)
#     x = tf.keras.layers.Dropout(0.25)(x)
    predictions = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    
    model1 = tf.keras.Model(inputs = base_model1.input, outputs = predictions)
    
    model1.compile(
        optimizer = tf.keras.optimizers.Nadam(),
        loss = tfa.losses.SigmoidFocalCrossEntropy(alpha = 0.9, gamma = 2.0),
#         loss = tf.keras.losses.BinaryCrossentropy(),
        metrics = [tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.BinaryAccuracy()]
    )

    model1.summary()
    
    history1 = model1.fit(train_dataset,
                        epochs = 15,
                        steps_per_epoch = STEPS_PER_EPOCH,
                        callbacks = [lr_callback, es_callback],
                        validation_data = val_dataset)

# **MODEL EVALUATION**
Plot the metrics to visualize the model's performance.

In [None]:
mpl.rcParams['figure.figsize'] = (12, 10)
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

metrics1 = ['loss', 'auc', 'precision', 'binary_accuracy']

def plot_metrics(history, metrics):
    history.history['loss'] = [np.mean(i) for i in history.history['loss']]
    history.history['val_loss'] = [np.mean(i) for i in history.history['val_loss']]
    for n, metric in enumerate(metrics):
        name = metric.replace("_", " ").upper()
        plt.subplot(2, 2, n + 1)
        plt.plot(history.epoch, history.history[metric], color = colors[0], label = 'Train')
        plt.plot(history.epoch, history.history['val_' + metric], color = colors[0], linestyle = "--", label = 'Val')
        plt.xlabel('Epoch')
        plt.ylabel(name)
        plt.legend()

plot_metrics(history1, metrics1)

# **PREDICTIONS 1**
We now get the prediction probabilities for our test images. Since we are splitting the dataset and iterating separately on images and ids, order matters.

In [None]:
def parse_rec_test(data):
    UNLABELED_TFREC_FORMAT = {
        "image": tf.io.FixedLenFeature([], tf.string),
        "image_name": tf.io.FixedLenFeature([], tf.string),
    }
    file = tf.io.parse_single_example(data, UNLABELED_TFREC_FORMAT)
    image = tf.image.decode_image(file["image"])
    image = tf.cast(image, tf.float32)
    image = tf.keras.applications.inception_v3.preprocess_input(image)
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    idnum = file["image_name"]
    return image, idnum

print('Computing probabilities...')
test_ds = tf.data.TFRecordDataset(test_files, num_parallel_reads = AUTO)
test_ds = test_ds.with_options(tf.data.Options()).map(parse_rec_test, num_parallel_calls = AUTO)
test_ds = test_ds.batch(BATCH_SIZE).prefetch(AUTO)
test_images = test_ds.map(lambda image, idnum: image)

probabilities1 = model1.predict(test_images)
probabilities1 = probabilities1[:,0]

# MODEL 2 & TRAINING

In [None]:
with strategy.scope():
    base_model2 = tf.keras.applications.Xception(weights = 'imagenet', include_top = False, pooling = 'max', input_shape = (1024, 1024, 3))
    
    for layer in base_model2.layers:
        layer.trainable = False
        
    x = base_model2.output
    x = tf.keras.layers.Dense(500, activation = 'relu')(x)
    predictions = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    
    model2 = tf.keras.Model(inputs = base_model2.input, outputs = predictions)
        
    model2.compile(
        optimizer = tf.keras.optimizers.RMSprop(),
        loss = tfa.losses.SigmoidFocalCrossEntropy(alpha = 0.9, gamma = 2.0),
        metrics = [tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.BinaryAccuracy()]
    )

    history2 = model2.fit(train_dataset,
                        epochs = 15,
                        steps_per_epoch = STEPS_PER_EPOCH,
                        callbacks = [lr_callback, es_callback],
                        validation_data = val_dataset)

metrics2 = ['loss', 'auc_1', 'precision_1', 'binary_accuracy']

plot_metrics(history2, metrics2)

probabilities2 = model2.predict(test_images)
probabilities2 = probabilities2[:,0]

# Calculate mean of probabilities.
probabilities = (probabilities1 + probabilities2) / 2
threshold = 0.5
predictions = (probabilities>threshold)*1

# Calculate softmax and get the maximum.
# from scipy.special import softmax
# prob = np.column_stack((probabilities1, probabilities2))
# prob = softmax(prob, axis = 1)
# prob = np.mean(prob, axis = 1)
# prob = probabilities2
# predictions = (prob > 0.2)*1
print("Threshold: " + threshold)
print("Number of positive predictions: " + sum(predictions))

print('Generating submission.csv file...')
test_ids_ds = test_ds.map(lambda image, idnum: idnum).unbatch()
test_ids = next(iter(test_ids_ds.batch(10982))).numpy().astype('U') # all in one batch
# np.savetxt('submissionprob.csv', np.rec.fromarrays([test_ids, probabilities]), fmt=['%s', '%d'], delimiter=',', header='image_name,target', comments='')
np.savetxt('submission.csv', np.rec.fromarrays([test_ids, predictions]), fmt=['%s', '%d'], delimiter=',', header='image_name,target', comments='')