In [None]:
!pip install -q efficientnet

In [None]:
import os, random, re, math, time
random.seed(a=42)
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow.keras.backend as K
import efficientnet.tfkeras as efn

import PIL

from kaggle_datasets import KaggleDatasets

from tqdm import tqdm

In [None]:
DEVICE = "TPU"

CFG = dict(
    net_count         =   7,
    batch_size        =  32,
    
    read_size         = 256, 
    crop_size         = 256, 
    net_size          = 256, 
    
    LR_START          =   0.000005,
    LR_MAX            =   0.000020,
    LR_MIN            =   0.000001,
    LR_RAMPUP_EPOCHS  =   5,
    LR_SUSTAIN_EPOCHS =   0,
    LR_EXP_DECAY      =   0.8,
    epochs            =   15,
    
    rot               = 180.0,
    shr               =   2.0,
    hzoom             =   8.0,
    wzoom             =   8.0,
    hshift            =   8.0,
    wshift            =   8.0,

    optimizer         = 'adam',
    label_smooth_fac  =   0.05,
    
    tta_steps         =  25    
)

In [None]:
BASEPATH = "../input/siim-isic-melanoma-classification"
df_train = pd.read_csv(os.path.join(BASEPATH, 'train.csv'))
df_test  = pd.read_csv(os.path.join(BASEPATH, 'test.csv'))
df_sub   = pd.read_csv(os.path.join(BASEPATH, 'sample_submission.csv'))

GCS_PATH    = KaggleDatasets().get_gcs_path('melanoma-256x256')
files_train = np.sort(np.array(tf.io.gfile.glob(GCS_PATH + '/train*.tfrec')))
files_test  = np.sort(np.array(tf.io.gfile.glob(GCS_PATH + '/test*.tfrec')))

In [None]:
# with validation data
files_train, files_val = train_test_split(files_train,test_size=0.2, random_state=42)

In [None]:
if DEVICE == "TPU":
    print("connecting to TPU...")
    try:
        tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
        print('Running on TPU ', tpu.master())
    except ValueError:
        print("Could not connect to TPU")
        tpu = None

    if tpu:
        try:
            print("initializing  TPU ...")
            tf.config.experimental_connect_to_cluster(tpu)
            tf.tpu.experimental.initialize_tpu_system(tpu)
            strategy = tf.distribute.experimental.TPUStrategy(tpu)
            print("TPU initialized")
        except _:
            print("failed to initialize TPU")
    else:
        DEVICE = "GPU"

if DEVICE != "TPU":
    print("Using default strategy for CPU and single GPU")
    strategy = tf.distribute.get_strategy()

if DEVICE == "GPU":
    print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
    

AUTO     = tf.data.experimental.AUTOTUNE
REPLICAS = strategy.num_replicas_in_sync
print(f'REPLICAS: {REPLICAS}')

In [None]:
def get_mat(rotation, shear, height_zoom, width_zoom, height_shift, width_shift):
    # returns 3x3 transformmatrix which transforms indicies
        
    # CONVERT DEGREES TO RADIANS
    rotation = math.pi * rotation / 180.
    shear    = math.pi * shear    / 180.

    def get_3x3_mat(lst):
        return tf.reshape(tf.concat([lst],axis=0), [3,3])
    
    # ROTATION MATRIX
    c1   = tf.math.cos(rotation)
    s1   = tf.math.sin(rotation)
    one  = tf.constant([1],dtype='float32')
    zero = tf.constant([0],dtype='float32')
    
    rotation_matrix = get_3x3_mat([c1,   s1,   zero, 
                                   -s1,  c1,   zero, 
                                   zero, zero, one])    
    # SHEAR MATRIX
    c2 = tf.math.cos(shear)
    s2 = tf.math.sin(shear)    
    
    shear_matrix = get_3x3_mat([one,  s2,   zero, 
                                zero, c2,   zero, 
                                zero, zero, one])        
    # ZOOM MATRIX
    zoom_matrix = get_3x3_mat([one/height_zoom, zero,           zero, 
                               zero,            one/width_zoom, zero, 
                               zero,            zero,           one])    
    # SHIFT MATRIX
    shift_matrix = get_3x3_mat([one,  zero, height_shift, 
                                zero, one,  width_shift, 
                                zero, zero, one])
    
    return K.dot(K.dot(rotation_matrix, shear_matrix), 
                 K.dot(zoom_matrix,     shift_matrix))


def transform(image, cfg):    
    # input image - is one image of size [dim,dim,3] not a batch of [b,dim,dim,3]
    # output - image randomly rotated, sheared, zoomed, and shifted
    DIM = cfg["read_size"]
    XDIM = DIM%2 #fix for size 331
    
    rot = cfg['rot'] * tf.random.normal([1], dtype='float32')
    shr = cfg['shr'] * tf.random.normal([1], dtype='float32') 
    h_zoom = 1.0 + tf.random.normal([1], dtype='float32') / cfg['hzoom']
    w_zoom = 1.0 + tf.random.normal([1], dtype='float32') / cfg['wzoom']
    h_shift = cfg['hshift'] * tf.random.normal([1], dtype='float32') 
    w_shift = cfg['wshift'] * tf.random.normal([1], dtype='float32') 

    # GET TRANSFORMATION MATRIX
    m = get_mat(rot,shr,h_zoom,w_zoom,h_shift,w_shift) 

    # LIST DESTINATION PIXEL INDICES
    x   = tf.repeat(tf.range(DIM//2, -DIM//2,-1), DIM)
    y   = tf.tile(tf.range(-DIM//2, DIM//2), [DIM])
    z   = tf.ones([DIM*DIM], dtype='int32')
    idx = tf.stack( [x,y,z] )
    
    # ROTATE DESTINATION PIXELS ONTO ORIGIN PIXELS
    idx2 = K.dot(m, tf.cast(idx, dtype='float32'))
    idx2 = K.cast(idx2, dtype='int32')
    idx2 = K.clip(idx2, -DIM//2+XDIM+1, DIM//2)
    
    # FIND ORIGIN PIXEL VALUES           
    idx3 = tf.stack([DIM//2-idx2[0,], DIM//2-1+idx2[1,]])
    d    = tf.gather_nd(image, tf.transpose(idx3))
        
    return tf.reshape(d,[DIM, DIM,3])

In [None]:
def read_labeled_tfrecord(example):
    tfrec_format = {
        'image'                        : tf.io.FixedLenFeature([], tf.string),
        'image_name'                   : tf.io.FixedLenFeature([], tf.string),
#         'patient_id'                   : tf.io.FixedLenFeature([], tf.int64),
#         'sex'                          : tf.io.FixedLenFeature([], tf.int64),
#         'age_approx'                   : tf.io.FixedLenFeature([], tf.int64),
#         'anatom_site_general_challenge': tf.io.FixedLenFeature([], tf.int64),
#         'diagnosis'                    : tf.io.FixedLenFeature([], tf.int64),
        'target'                       : tf.io.FixedLenFeature([], tf.int64)
    }           
    example = tf.io.parse_single_example(example, tfrec_format)
    return example['image'], example['target']


def read_unlabeled_tfrecord(example, return_image_name):
    tfrec_format = {
        'image'                        : tf.io.FixedLenFeature([], tf.string),
        'image_name'                   : tf.io.FixedLenFeature([], tf.string),
    }
    example = tf.io.parse_single_example(example, tfrec_format)
    return example['image'], example['image_name'] if return_image_name else 0

 
def prepare_image(img, cfg=None, augment=True):    
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [cfg['read_size'], cfg['read_size']])
    img = tf.cast(img, tf.float32) / 255.0
    
    if augment:
        img = transform(img, cfg)
        img = tf.image.random_crop(img, [cfg['crop_size'], cfg['crop_size'], 3])
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_hue(img, 0.01)
        img = tf.image.random_saturation(img, 0.7, 1.3)
        img = tf.image.random_contrast(img, 0.8, 1.2)
        img = tf.image.random_brightness(img, 0.1)

    else:
        img = tf.image.central_crop(img, cfg['crop_size'] / cfg['read_size'])
                                   
    img = tf.image.resize(img, [cfg['net_size'], cfg['net_size']])
    img = tf.reshape(img, [cfg['net_size'], cfg['net_size'], 3])
    return img

def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) 
         for filename in filenames]
    return np.sum(n)

In [None]:
def get_dataset(files, cfg, augment = False, shuffle = False, repeat = False, 
                labeled=True, return_image_names=True):
    
    ds = tf.data.TFRecordDataset(files, num_parallel_reads=AUTO)
    ds = ds.cache()
    
    if repeat:
        ds = ds.repeat()
    
    if shuffle: 
        ds = ds.shuffle(1024*8)
        opt = tf.data.Options()
        opt.experimental_deterministic = False
        ds = ds.with_options(opt)
        
    if labeled: 
        ds = ds.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
    else:
        ds = ds.map(lambda example: read_unlabeled_tfrecord(example, return_image_names), 
                    num_parallel_calls=AUTO)      
    
    ds = ds.map(lambda img, imgname_or_label: (prepare_image(img, augment=augment, cfg=cfg), 
                                               imgname_or_label), 
                num_parallel_calls=AUTO)
    
    ds = ds.batch(cfg['batch_size'] * REPLICAS)
    ds = ds.prefetch(AUTO)
    return ds

## Test the input pipeline
Before calling any neural net I always test the input pipeline.
Here are images from the train data.

In [None]:
def show_dataset(thumb_size, cols, rows, ds):
    mosaic = PIL.Image.new(mode='RGB', size=(thumb_size*cols + (cols-1), 
                                             thumb_size*rows + (rows-1)))
   
    for idx, data in enumerate(iter(ds)):
        img, target_or_imgid = data
        ix  = idx % cols
        iy  = idx // cols
        img = np.clip(img.numpy() * 255, 0, 255).astype(np.uint8)
        img = PIL.Image.fromarray(img)
        img = img.resize((thumb_size, thumb_size), resample=PIL.Image.BILINEAR)
        mosaic.paste(img, (ix*thumb_size + ix, 
                           iy*thumb_size + iy))

    display(mosaic)
    
ds = get_dataset(files_train, CFG).unbatch().take(12*5)   
show_dataset(64, 12, 5, ds)

## Test of image augmentation

In [None]:
ds = tf.data.TFRecordDataset(files_train, num_parallel_reads=AUTO)
ds = ds.take(1).cache().repeat()
ds = ds.map(read_labeled_tfrecord, num_parallel_calls=AUTO)
ds = ds.map(lambda img, target: (prepare_image(img, cfg=CFG, augment=True), target), 
            num_parallel_calls=AUTO)
ds = ds.take(12*5)
ds = ds.prefetch(AUTO)

show_dataset(64, 12, 5, ds)

## Images from the test data

In [None]:
ds = get_dataset(files_test, CFG, labeled=False).unbatch().take(12*5)   
show_dataset(64, 12, 5, ds)

In [None]:

def model_f0(dim):
    inp = tf.keras.layers.Input(shape=(dim,dim,3))
    base = efn.EfficientNetB0(input_shape=(dim,dim,3),weights='imagenet',include_top=False)
    x = base(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(1,activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inp,outputs=x)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.05) 
    model.compile(optimizer=opt,loss=loss,metrics=['AUC'])
    return model


def model_f1(dim):
    inp = tf.keras.layers.Input(shape=(dim,dim,3))
    base = efn.EfficientNetB1(input_shape=(dim,dim,3),weights='imagenet',include_top=False)
    x = base(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(1,activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inp,outputs=x)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.05) 
    model.compile(optimizer=opt,loss=loss,metrics=['AUC'])
    return model


def model_f2(dim):
    inp = tf.keras.layers.Input(shape=(dim,dim,3))
    base = efn.EfficientNetB2(input_shape=(dim,dim,3),weights='imagenet',include_top=False)
    x = base(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(1,activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inp,outputs=x)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.05) 
    model.compile(optimizer=opt,loss=loss,metrics=['AUC'])
    return model

def model_f3(dim):
    inp = tf.keras.layers.Input(shape=(dim,dim,3))
    base = efn.EfficientNetB3(input_shape=(dim,dim,3),weights='imagenet',include_top=False)
    x = base(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(1,activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inp,outputs=x)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.05) 
    model.compile(optimizer=opt,loss=loss,metrics=['AUC'])
    return model


def model_f4(dim):
    inp = tf.keras.layers.Input(shape=(dim,dim,3))
    base = efn.EfficientNetB4(input_shape=(dim,dim,3),weights='imagenet',include_top=False)
    x = base(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(1,activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inp,outputs=x)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.05) 
    model.compile(optimizer=opt,loss=loss,metrics=['AUC'])
    return model

def model_f5(dim):
    inp = tf.keras.layers.Input(shape=(dim,dim,3))
    base = efn.EfficientNetB5(input_shape=(dim,dim,3),weights='imagenet',include_top=False)
    x = base(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(1,activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inp,outputs=x)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.05) 
    model.compile(optimizer=opt,loss=loss,metrics=['AUC'])
    return model

def model_f6(dim):
    inp = tf.keras.layers.Input(shape=(dim,dim,3))
    base = efn.EfficientNetB6(input_shape=(dim,dim,3),weights='imagenet',include_top=False)
    x = base(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(1,activation='sigmoid')(x)
    model = tf.keras.Model(inputs=inp,outputs=x)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    loss = tf.keras.losses.BinaryCrossentropy(label_smoothing=0.05) 
    model.compile(optimizer=opt,loss=loss,metrics=['AUC'])
    return model

In [None]:
with strategy.scope():
    model_f0 = model_f0(dim=256)
    model_f1 = model_f1(dim=256)
    model_f2 = model_f2(dim=256)
    model_f3 = model_f3(dim=256)
    model_f4 = model_f4(dim=256)
    model_f5 = model_f5(dim=256)
    model_f6 = model_f6(dim=256)

In [None]:
ds_train     = get_dataset(files_train, CFG, augment=True, shuffle=True, repeat=True)
ds_train     = ds_train.map(lambda img, label: (img, tuple([label] * CFG['net_count'])))

ds_val     = get_dataset(files_val, CFG, augment=True, shuffle=True, repeat=False)
ds_val     = ds_val.map(lambda img, label: (img, tuple([label] * CFG['net_count'])))

steps_train  = count_data_items(files_train) / (CFG['batch_size'] * REPLICAS)




In [None]:
def get_lr_callback(cfg):
    lr_start   = cfg['LR_START']
    lr_max     = cfg['LR_MAX'] * strategy.num_replicas_in_sync
    lr_min     = cfg['LR_MIN']
    lr_ramp_ep = cfg['LR_RAMPUP_EPOCHS']
    lr_sus_ep  = cfg['LR_SUSTAIN_EPOCHS']
    lr_decay   = cfg['LR_EXP_DECAY']
   
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
            
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
            
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
            
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    return lr_callback

In [None]:
print("=========== Model_f0 =============")
history0 = model_f0.fit(ds_train, verbose = 1, steps_per_epoch  = steps_train, epochs = CFG['epochs'], callbacks = [get_lr_callback(CFG)],validation_data=ds_val)
model_f0.save('model_f0.hdf5')
print("=========== Model_f1 =============")
history1 = model_f1.fit(ds_train, verbose = 1, steps_per_epoch  = steps_train, epochs = CFG['epochs'], callbacks = [get_lr_callback(CFG)],validation_data=ds_val)
model_f1.save('model_f1.hdf5')
print("=========== Model_f2 =============")
history2 = model_f2.fit(ds_train, verbose = 1, steps_per_epoch  = steps_train, epochs = CFG['epochs'], callbacks = [get_lr_callback(CFG)],validation_data=ds_val)
model_f2.save('model_f2.hdf5')
print("=========== Model_f3 =============")
history3 = model_f3.fit(ds_train, verbose = 1, steps_per_epoch  = steps_train, epochs = CFG['epochs'], callbacks = [get_lr_callback(CFG)],validation_data=ds_val)
model_f3.save('model_f3.hdf5')
print("=========== Model_f4 =============")
history4 = model_f4.fit(ds_train, verbose = 1, steps_per_epoch  = steps_train, epochs = CFG['epochs'], callbacks = [get_lr_callback(CFG)],validation_data=ds_val)
model_f4.save('model_f4.hdf5')
print("=========== Model_f5 =============")
history5 = model_f5.fit(ds_train, verbose = 1, steps_per_epoch  = steps_train, epochs = CFG['epochs'], callbacks = [get_lr_callback(CFG)],validation_data=ds_val)
model_f5.save('model_f5.hdf5')
print("=========== Model_f6 =============")
history6 = model_f6.fit(ds_train, verbose = 1, steps_per_epoch  = steps_train, epochs = CFG['epochs'], callbacks = [get_lr_callback(CFG)], validation_data=ds_val)
model_f6.save('model_f6.hdf5')

### predict the test set using augmented images

In [None]:
test_ds = get_dataset(files_test, CFG, augment=True, repeat=False, labeled=False, return_image_names=False)

print('Computing predictions...')
test_images_ds = test_ds.map(lambda image, idnum: image)

model_list = [model_f0,model_f1,model_f2,model_f3,model_f4,model_f5,model_f6]


ens_probabilities = [model.predict(test_images_ds) for model in model_list]

print("========================  Done  ============================")

In [None]:
# Average the predictions of models
average_prob = np.sum(ens_probabilities, axis=0)/len(model_list)

# weight the prediction of models

# weights = [0.3, 0.3, 0.4]
# #Use tensordot to sum the products of all elements over specified axes.
# weighted_prob = np.tensordot(ens_probabilities, weights, axes=((0),(0)))


In [None]:
ds = get_dataset(files_test, CFG, augment=False, repeat=False, 
                 labeled=False, return_image_names=True)

image_names = np.array([img_name.numpy().decode("utf-8") 
                        for img, img_name in iter(ds.unbatch())])

In [None]:
pred_average = pd.DataFrame({'image_name': image_names, 'target': np.concatenate(average_prob)})

ef0 = pd.DataFrame({'image_name': image_names, 'target': np.concatenate(ens_probabilities[0])})
ef1 = pd.DataFrame({'image_name': image_names, 'target': np.concatenate(ens_probabilities[1])})
ef2 = pd.DataFrame({'image_name': image_names, 'target': np.concatenate(ens_probabilities[2])})
ef3 = pd.DataFrame({'image_name': image_names, 'target': np.concatenate(ens_probabilities[3])})
ef4 = pd.DataFrame({'image_name': image_names, 'target': np.concatenate(ens_probabilities[4])})
ef5 = pd.DataFrame({'image_name': image_names, 'target': np.concatenate(ens_probabilities[5])})
ef6 = pd.DataFrame({'image_name': image_names, 'target': np.concatenate(ens_probabilities[6])})

In [None]:
pred_average.to_csv('pred_average.csv', index=False)
ef0.to_csv('ef0.csv', index=False)
ef1.to_csv('ef1.csv', index=False)
ef2.to_csv('ef2.csv', index=False)
ef3.to_csv('ef3.csv', index=False)
ef4.to_csv('ef4.csv', index=False)
ef5.to_csv('ef5.csv', index=False)
ef6.to_csv('ef6.csv', index=False)

In [None]:
# load models
from keras.models import load_model

m0 = load_model('./model_f0.hdf5')
m1 = load_model('./model_f1.hdf5')
m2 = load_model('./model_f3.hdf5')
m3 = load_model('./model_f3.hdf5')
m4 = load_model('./model_f4.hdf5')
m5 = load_model('./model_f5.hdf5')
m6 = load_model('./model_f6.hdf5')


In [None]:
models = [m1,m2,m3,m4,m5,m6]
test1 = test_ds.take(1)
test10 = test_ds.take(10)
test50 = test_ds.take(50)
test100 = test_ds.take(100)

In [None]:
import time

st = time.time()
pr = m6.predict(test10)
ed = time.time()
print("Single model time for 10 data: ", (ed - st))

start10 = time.time()
ens = [model.predict(test10) for model in models]
av = np.sum(ens, axis=0)/len(models)
end10 = time.time()
print("ensemble time for 10 data: ", (end10 - start10))

In [None]:
st = time.time()
pr = m6.predict(test1)
ed = time.time()
print("Single model time for 1 data: ", (ed - st))

start1 = time.time()
ens = [model.predict(test1) for model in models]
av = np.sum(ens, axis=0)/len(models)
end1 = time.time()
print("ensemble time for 1 data: ", (end1 - start1))

In [None]:
st = time.time()
pr = m6.predict(test50)
ed = time.time()
print("Single model time for 50 data: ", (ed - st))

start50 = time.time()
ens = [model.predict(test50) for model in models]
av = np.sum(ens, axis=0)/len(models)
end50 = time.time()
print("ensemble time for 50 data: ", (end50 - start50))