In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import tensorflow as tf 

!pip install -U efficientnet

import efficientnet.tfkeras as eff
from kaggle_datasets import KaggleDatasets

In [None]:
# detect and init the TPU
tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(tpu)
tf.tpu.experimental.initialize_tpu_system(tpu)

# instantiate a distribution strategy
tpu_strategy = tf.distribute.experimental.TPUStrategy(tpu)


In [None]:
AUTO = tf.data.experimental.AUTOTUNE
ignore_order = tf.data.Options()
ignore_order.experimental_deterministic = False

In [None]:
gcs_path=KaggleDatasets().get_gcs_path('melanoma-512x512')

In [None]:
filepaths=tf.io.gfile.glob(gcs_path+'/train*.tfrec')

In [None]:
filepaths_train=filepaths[:-1]
filepaths_valid=filepaths[-1]

In [None]:
BATCH_SIZE=8*tpu_strategy.num_replicas_in_sync
TRAIN_STEPS=int(30952/(BATCH_SIZE))+1
VALID_STEPS=int(2174/(BATCH_SIZE))+1

# image augmentations

In [None]:
def make_augmentations(image):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_saturation(image, 0.7, 1.3)
    image = tf.image.random_contrast(image, 0.8, 1.2)
    image = tf.image.random_brightness(image, 0.1)
    return image

In [None]:
def read_tfrec_train(example):
    feature_description={'image_name':tf.io.FixedLenFeature([],tf.string),
                     'image':tf.io.FixedLenFeature([],tf.string),
                     'target':tf.io.FixedLenFeature([],tf.int64)}
    example=tf.io.parse_single_example(example,feature_description)
    return example['image'],example['target']

In [None]:
def make_train_dataset(filepaths,batch_size,train=True):
    ds=tf.data.TFRecordDataset(filepaths,num_parallel_reads=AUTO)
    ds=ds.with_options(ignore_order)
    ds=ds.map(read_tfrec_train,num_parallel_calls=AUTO)
    ds=ds.repeat()
    ds=ds.shuffle(1024)
    ds=ds.map(lambda img,label : (tf.io.decode_image(img,channels=3),label),num_parallel_calls=AUTO)
    ds=ds.map(lambda img,label:(tf.cast(img,dtype=tf.float32),label),num_parallel_calls=AUTO)
    if train:
        ds=ds.map(lambda img,label :(make_augmentations(img),label),num_parallel_calls=AUTO)
    ds=ds.map(lambda img,label:(tf.reshape(img,[512,512,3]),label),num_parallel_calls=AUTO)
    ds=ds.batch(batch_size)
    ds=ds.prefetch(AUTO)
    return ds
train_ds=make_train_dataset(filepaths_train,batch_size=BATCH_SIZE,train=True)
valid_ds=make_train_dataset(filepaths_valid,batch_size=BATCH_SIZE,train=False)

In [None]:
train_ds,valid_ds

# with ensemble without metadata

In [None]:
class early_stopping(tf.keras.callbacks.Callback):
    def __init__(self,patience=1):
        self.auc_op=-1.0
        self.patience=patience
        self.count=0
    def on_epoch_end(self,epoch,logs={}):
                    
        if logs['val_auc']>=self.auc_op:
            self.weights_op=self.model.get_weights()
            self.auc_op=logs['val_auc']
            self.count=0
            print('\nVALIDATION AUC INCREASED')
        else:
            self.count=self.count+1
            print('\nVALIDATION AUC DID NOT INCREASE ES COUNT :{}/{}'.format(self.count,self.patience))
            if(self.count==self.patience):
                print('EARLY STOPPING ACTIVATED')
                self.model.stop_training=True
                print('RESTORING WEIGHTS OF EPOCH {} . BEST VALID AUC = {}'.format(epoch-self.patience+1,self.auc_op))
                self.model.set_weights(self.weights_op)
                
def schedule(epoch):
    return 0.0001*(2.71828**(-0.5*epoch))

lr_scheduler=tf.keras.callbacks.LearningRateScheduler(schedule)

In [None]:
train=pd.read_csv('../input/melanoma-512x512/train.csv')
from sklearn.utils.class_weight import compute_class_weight
cw=compute_class_weight('balanced',np.unique(train['target']),train['target'])
cw_dict={0:cw[0],1:cw[1]}
cw_dict

In [None]:
with tpu_strategy.scope():
    input_layer=tf.keras.layers.Input(shape=(512,512,3))
    common_input=tf.keras.layers.Lambda(lambda x:x)(input_layer)


    model_b0=eff.EfficientNetB1(weights='imagenet',input_shape=(512,512,3),include_top=False)(common_input)
    model_b1=eff.EfficientNetB2(weights='imagenet',input_shape=(512,512,3),include_top=False)(common_input)
    model_b2=eff.EfficientNetB3(weights='imagenet',input_shape=(512,512,3),include_top=False)(common_input)


    gapb0=tf.keras.layers.GlobalAveragePooling2D()(model_b0)
    predsb0=tf.keras.layers.Dense(1)(gapb0)


    gapb1=tf.keras.layers.GlobalAveragePooling2D()(model_b1)
    predsb1=tf.keras.layers.Dense(1)(gapb1)


    gapb2=tf.keras.layers.GlobalAveragePooling2D()(model_b2)
    predsb2=tf.keras.layers.Dense(1)(gapb2)


    output_concat=tf.keras.layers.Concatenate()([predsb0,predsb1,predsb2])
    real_out=tf.keras.layers.Dense(1,activation='sigmoid')(output_concat)
    ensem_model=tf.keras.models.Model(inputs=[input_layer],outputs=[real_out])
    
    
    roc_score=tf.keras.metrics.AUC(curve='ROC')
    

    ensem_model.compile(optimizer='adam',loss='binary_crossentropy',metrics=[roc_score])


In [None]:
history=ensem_model.fit(train_ds,epochs=100,steps_per_epoch=TRAIN_STEPS,class_weight=cw_dict,validation_data=valid_ds,validation_steps=VALID_STEPS,callbacks=[early_stopping(patience=6),lr_scheduler])

# now for the predictions

In [None]:
filepaths_test=tf.io.gfile.glob(gcs_path+'/test*.tfrec')

In [None]:
def read_tfrec_test(example,read_filenames):
    feature_description={'image_name':tf.io.FixedLenFeature([],tf.string),
                     'image':tf.io.FixedLenFeature([],tf.string)}
    example=tf.io.parse_single_example(example,feature_description)
    if read_filenames:
        return example['image'],example['image_name']
    else:
        return example['image']

In [None]:
def make_test_dataset(filepaths,batch_size,read_filenames=False):
    ds=tf.data.TFRecordDataset(filepaths,num_parallel_reads=AUTO)
    ds=ds.map(lambda example: read_tfrec_test(example,read_filenames),num_parallel_calls=AUTO)
    if read_filenames:
        ds=ds.map(lambda img,label: (tf.io.decode_image(img,channels=3),label),num_parallel_calls=AUTO)
        ds=ds.map(lambda img,label:(tf.cast(img,dtype=tf.float32),label),num_parallel_calls=AUTO)
        ds=ds.map(lambda img,label:(tf.reshape(img,[512,512,3]),label),num_parallel_calls=AUTO)
        
    else:
        ds=ds.map(lambda img:tf.io.decode_image(img,channels=3),num_parallel_calls=AUTO)
        ds=ds.map(lambda img:tf.cast(img,dtype=tf.float32),num_parallel_calls=AUTO)
        ds=ds.map(lambda img:tf.reshape(img,[512,512,3]),num_parallel_calls=AUTO)
        
    ds=ds.batch(batch_size)
    ds=ds.prefetch(AUTO)
    return ds

In [None]:
test_ids_ds=make_test_dataset(filepaths_test,batch_size=BATCH_SIZE,read_filenames=True)

In [None]:
test_imagenames=[]
for item in test_ids_ds.unbatch():
    test_imagenames.append(item[1].numpy().decode('utf-8'))

In [None]:
test_ds=make_test_dataset(filepaths_test,batch_size=BATCH_SIZE,read_filenames=False)

In [None]:
preds=ensem_model.predict(test_ds)

In [None]:
submit=pd.DataFrame(dict(image_name=test_imagenames,target=preds.ravel()))

In [None]:
submit=submit.sort_values('image_name')

In [None]:
submit.to_csv('final_submit_5.csv',index=False)

In [None]:
print('DONE , HURRAY')