In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Introduction
---
---
while creating an image classifier the very first approach we should go for is to look for a pre trained models instead of directly creating a model from scratch in this notebook we will look at how we can use a pre trained model and fine tune for our particular in this case melanoma classification. The model we are going to use is tensorflows InceptionV3

In [None]:
import tensorflow as tf
from kaggle_datasets import KaggleDatasets
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow_addons as tfa
from tensorflow.keras.layers import Dense , Dropout , BatchNormalization , concatenate
from tensorflow.keras.layers import Activation , Input , GlobalAveragePooling2D  
from tensorflow.keras.models import Model , Sequential
from tensorflow.keras.applications import InceptionV3 , MobileNetV2
from sklearn.model_selection import KFold
import re
from PIL import Image

## Utitlity Functions
---
now lets first create some our utility function for data preparation , image preprocessing and augumentation

In [None]:
IMG_DIMS = 64 # inception net has a minimum input size of 75x75 thus 150 is good
CHANNELS = 3
BATCH_SIZE = 32
SEED = 42
SPLITS = 5

AUTO  = tf.data.experimental.AUTOTUNE

In [None]:
# getting training data gcs path
GCS_PATH = KaggleDatasets().get_gcs_path('melanoma-256x256')
train_datasets = tf.io.gfile.glob(GCS_PATH + '/train*.tfrec')

print('number of TFRecords in train : ',len(train_datasets))

# getting testing data
GCS_PATH = KaggleDatasets().get_gcs_path('siim-isic-melanoma-classification')
test_datasets = tf.io.gfile.glob(GCS_PATH + '/tfrecords/test*.tfrec')

print('number of TFRecords in test : ' ,len(test_datasets))


In [None]:
# parse data from TF Records
def parse_TFR_data_labelled(sample):
    features = {
      'image': tf.io.FixedLenFeature([] , tf.string , default_value = ''),
      'image_name': tf.io.FixedLenFeature([] , tf.string , default_value=''),
      'patient_id': tf.io.FixedLenFeature([] , tf.int64 , default_value=0),
      'sex': tf.io.FixedLenFeature([] , tf.int64 , default_value=0),
      'age_approx': tf.io.FixedLenFeature([] , tf.int64 , default_value=0),
      'anatom_site_general_challenge':tf.io.FixedLenFeature([] ,tf.int64 , default_value=0 ),
      'diagnosis': tf.io.FixedLenFeature([] ,tf.int64 , default_value=0 ),
      'target': tf.io.FixedLenFeature([] ,tf.int64 , default_value=0 ),
      'width': tf.io.FixedLenFeature([] ,tf.int64 , default_value=0 ),
      'height': tf.io.FixedLenFeature([] ,tf.int64 , default_value=0 )
    }
    
    p = tf.io.parse_single_example(sample , features)
    
    img = p['image']
    target = p['target']
    
    return img , target

In [None]:
# decode img
def decode_image(img , IMG_DIMS):
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img , [IMG_DIMS , IMG_DIMS])
    return img

In [None]:
# load data set for training and validation
def _get_ds(files , train=True , repeat=True , img_dims=64 , batch_size=32):
    ds = tf.data.TFRecordDataset(files , num_parallel_reads=AUTO)
    ds = ds.cache()
    
    if repeat:
        ds = ds.repeat()
    ds = ds.map(parse_TFR_data_labelled , num_parallel_calls=AUTO)     
    ds = ds.map(lambda img ,label: (decode_image(img, img_dims),label) , num_parallel_calls=AUTO)
    if train:
        ds = ds.shuffle(buffer_size=1000)
       
    ds = ds.batch(batch_size*REPLICAS)
    ds = ds.prefetch(AUTO)
    return ds

In [None]:
def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) 
         for filename in filenames]
    return np.sum(n)

In [None]:
# preparing the testing data
def parsed_TFR_unlabelled_2(sample):
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string, default_value=''),
        'image_name': tf.io.FixedLenFeature([], tf.string, default_value=''),
        'target': tf.io.FixedLenFeature([], tf.int64, default_value=0),
    }
    p = tf.io.parse_single_example(sample , feature_description)
    img = p['image']
    name = p['image_name']
    return name , img

test_data= tf.data.TFRecordDataset(test_datasets)
test_data = test_data.map(parsed_TFR_unlabelled_2 , num_parallel_calls=AUTO)
test_data = test_data.map(lambda name , img: (name , decode_image(img , 64)))

sub_df = pd.read_csv('../input/siim-isic-melanoma-classification/sample_submission.csv')

x_dict = {}
for p in test_data:
    temp = {p[0].numpy().decode() : p[1].numpy()}
    x_dict.update(temp)
    
print(f'number of samples in testing data : {len(x_dict)}')

test = []
for i in sub_df['image_name']:
    test.append(x_dict[i])
    del(x_dict[i])
    
test = np.array(test)
print(f'sahpe of testing set sorted according to the submission file : {test.shape}')

now everything is ready lets
there are two ways we can use a pre trained model 
1. As a feature extractor - in this approach we don't train the pre trained model but use it as a feature extractor that is we will only use the trained model to process the image and produce the feature vector and then we will train a small inference model on these feature vectors. this approach is suitable when your dataset is very simple and very similar to what the model was trained on

2. Fine Tuning - In this approach we retrrain the whole model on our dataset . now sometimes fine tuning also requires to adjust what layers you want to train and what you don't want. as most of the time the initial layers produce the same result for almost every data so you don't need to train those layers however the lower layers or the top layers are the place things changes so you need to train those layers

# Using Pre Trained Model as a Feature extractor
---
---

now there are two ways of using a feature extractor
1. use it as a part of the model - in this approach the features are regenerated in each epoch this computionally expensive if you want your model to work on  alow end device
2. extract the features once - in this approach we extract the features of all the images and store them in a dataset and use that dataset to train the model. this approach is prefered when you are on a low end device but it takes more effort than the first one as you have to create a whole new dataset and makw sure that label of each features is accurately map to the correct feature

now here we are going for the first approach as we have resources to bear those computations thanks to kaggle for providing us with free TPU and GPUs 

first we need to create our base model and set all its layers to non trainable

now inception net requires each pixel in the image to be in the range of -1 and 1 so we inception_v3 preprocess layer to perform this transformation <br>
and then we create a sequential model for image augumentaion which will augument the image and will feed it to base model and then base model will extract the features and will feed it to the inference model

In [None]:
# augumentaion model
aug = Sequential([
    tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
    tf.keras.layers.experimental.preprocessing.RandomContrast(0.3)
])


In [None]:
def create_Inf_model(IMG_DIMS , CHANNELS):
    in_put = Input(shape=(IMG_DIMS , IMG_DIMS , 3))
    # applying auggumentations
    #pre = aug(in_put)
    # pre process layer
    pre_process_layer = tf.keras.applications.mobilenet_v2.preprocess_input
    pre = pre_process_layer(in_put)
    # base model non trainable
    base_model = MobileNetV2(input_shape=(IMG_DIMS , IMG_DIMS , 3) , include_top=False , weights='imagenet')
    x = base_model(pre , training = False)
    # top trainable model layers
    x = GlobalAveragePooling2D()(x)
    x = Dense(128 , activation = 'relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(1 , activation = 'sigmoid')(x)
    model = Model(inputs=in_put , outputs=x)
    # optimizer
    opt = tf.keras.optimizers.Adam(0.0001)
    model.compile(optimizer=opt , loss='binary_crossentropy' , metrics=['accuracy' , 'AUC'])
    return model

now our model is reeady but since we don't have any validation set we will KFold cross validation technique to cross validate our model

First lets allocate the TPUs

In [None]:
TPU = tf.distribute.cluster_resolver.TPUClusterResolver()
tf.config.experimental_connect_to_cluster(TPU)
tf.tpu.experimental.initialize_tpu_system(TPU)
strategy = tf.distribute.experimental.TPUStrategy(TPU)

REPLICAS = strategy.num_replicas_in_sync

In [None]:

kf = KFold(n_splits=SPLITS)
oof_hist = []
oof_val = []
for f , (idxT , idxV) in enumerate(kf.split(train_datasets)):
    train = []
    val =[]
    for idx in idxT:
        train.append(train_datasets[idx])
    for idx in idxV:
        val.append(train_datasets[idx])

    # instantiate model
    with strategy.scope():
         # cretae model check points
        cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath='inf_model_fold_'+str(f)+'.hdf5' , 
                                                         monitor='val_auc',
                                                         mode='max',
                                                         save_best_only =True,
                                                         verbose = 1 )
        # early stopping
        es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_auc' , patience=5 , mode='max' )
        model = create_Inf_model(IMG_DIMS , CHANNELS)
        
        history = model.fit(_get_ds(train) ,
                        epochs = 20 ,
                        steps_per_epoch = count_data_items(train)/BATCH_SIZE//REPLICAS,
                        validation_data=_get_ds(val , train=False , repeat=False),
                        callbacks = [cp_callback , es_callback],
                        verbose = 0)
    oof_hist.append(history)
    oof_val.append(_get_ds(val, train=False))
    

In [None]:
fig = plt.figure(figsize=(10,10))
i = 1
for p in oof_hist:
    fig.add_subplot(2,3,i)
    plt.plot(p.history['loss'])
    plt.plot(p.history['val_loss'])
    plt.title('fold '+str(i)+ ' LOSS')
    i +=1

plt.legend(labels= ['loss' ,'val_loss'])
fig.tight_layout(pad=3)
plt.show()


In [None]:
fig = plt.figure(figsize=(10,10))
i = 1
for p in oof_hist:
    fig.add_subplot(2,3,i)
    plt.plot(p.history['auc'])
    plt.plot(p.history['val_auc'])
    plt.title('fold '+str(i) + ' AUC')
    i +=1

plt.legend(labels= ['auc' ,'val_auc'])
fig.tight_layout(pad=3)
plt.show()

In [None]:
best_model = create_Inf_model(IMG_DIMS , CHANNELS)

In [None]:
best_model.load_weights('inf_model_fold_0.hdf5')

In [None]:
preds = best_model.predict(test)

In [None]:
sub_df['target'] = preds
sub_df.set_index('image_name' , inplace=True)
sub_df.head()

In [None]:
sub_df.to_csv('submission.csv')

so you can see with the results our dataset is not that simple . thus we would need to fine tune our model 

# fine tuning a pre trained model
now in this case we will be training our pre trained models except for its bottom layers so first lets look at how many layers are their in our model


In [None]:
base = InceptionV3(input_shape = (150, 150 , 3) , weights ='imagenet' , include_top=False)
print(f'# layers in base model = {len(base.layers)}')

**IMPORTANT** now there is one important thing you need to know about while unfreezing your model is that if your pre trained model contains **BatchNormalization layer** then you must keep them frozen because BatchNormalization contains two non-trainable weights which get updated during training . so if you unfreeze these layers it will change these non trainable weights and will destroy the knowledge of pre trained model. thus must check if your model contains these layers and keep them frozen while unfreezing the model

so now lets look how many BatchNormalization layers our model contains

In [None]:
count = 0
for layers in base.layers:
    if type(layers) == tf.python.keras.layers.normalization_v2.BatchNormalization:
        count +=1

print(f'no of BatchNormalization Layers in our base model : {count}')

now lets create a simple function which will keep these layers frozen

In [None]:
def freeze_bn_layers(model):
    for layers in model.layers:
        if type(layers) == tf.python.keras.layers.normalization_v2.BatchNormalization:
            layers.trainable = False

so we won't train the bottom 100 layers. lets define a function to set these layers to non trainable

In [None]:
def set_non_train_layers(model):
    n = 100
    for layers in model.layers[:100]:
        layers.trainable = False

now lets create a our fine tune model function

In [None]:
def trainable_model(IMG_DIMS , CHANNELS):
    in_put = Input(shape=(IMG_DIMS , IMG_DIMS , 3))
    # pre process layer
    pre_process_layer = tf.keras.applications.mobilenet_v2.preprocess_input
    pre = pre_process_layer(in_put)
    # base model non trainable
    base_model = MobileNetV2(input_shape=(IMG_DIMS , IMG_DIMS , 3) , include_top=False , weights='imagenet')
    base_model.trainable = True
    # set non trainable layers in the model
    set_non_train_layers(base_model)
    # freeze bn layers
    freeze_bn_layers(base_model)
    x = base_model(pre)
    # top trainable model layers
    x = GlobalAveragePooling2D()(x)
    x = Dense(128 , activation = 'relu')(x)
    x = Dropout(0.3)(x)
    x = Dense(1 , activation = 'sigmoid')(x)
    model = Model(inputs=in_put , outputs=x)
    # optimizer
    model.compile(optimizer='adam' , loss='binary_crossentropy' , metrics=['accuracy' , 'AUC'])
    return model

now to train a deep network we should use a learning rate schedule for fine tuning the learning rate of our model and also speed up training so lets define that , this lr schedule I have copied from chris deotte's notebook stratifiedKfold with TFrecords


In [None]:
def lr_schedule(batch_size= 16):
    lr_start = 0.000005
    lr_max = 0.00000125 * REPLICAS * batch_size
    lr_min = 0.000001
    lr_ramp_ep = 5
    lr_sus_ep = 0
    lr_decay = 0.8
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start

        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max

        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min

        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=False)
    return lr_callback

now lets retrain this model 

In [None]:
kf = KFold(n_splits=SPLITS)
oof_hist = []
oof_val = []
for f , (idxT , idxV) in enumerate(kf.split(train_datasets)):
    train = []
    val =[]
    for idx in idxT:
        train.append(train_datasets[idx])
    for idx in idxV:
        val.append(train_datasets[idx])

    # instantiate model
    with strategy.scope():
        # lr callback
        lr_callback = lr_schedule(BATCH_SIZE)
        # cretae model check points
        cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath='inf_model_fold_'+str(f)+'.hdf5' , 
                                                         monitor='val_auc',
                                                         mode='max',
                                                         save_best_only =True,
                                                         verbose = 1 )
        # early stopping
        es_callback = tf.keras.callbacks.EarlyStopping(monitor='val_auc' , patience=5 , mode='max' )
        model = trainable_model(IMG_DIMS , CHANNELS)
        history = model.fit(_get_ds(train) ,
                            epochs = 20 ,
                            steps_per_epoch = count_data_items(train)/BATCH_SIZE//REPLICAS,
                            validation_data=_get_ds(val , train=False , repeat=False),
                            callbacks = [lr_callback , cp_callback , es_callback],
                            verbose = 0)
    oof_hist.append(history)
    oof_val.append(_get_ds(val, train=False))
    

In [None]:
fig = plt.figure(figsize=(10,10))
i = 1
for p in oof_hist:
    fig.add_subplot(2,3,i)
    plt.plot(p.history['loss'])
    plt.plot(p.history['val_loss'])
    plt.title('fold '+str(i)+ ' LOSS')
    i +=1

plt.legend(labels= ['loss' ,'val_loss'])
fig.tight_layout(pad=3)
plt.show()

In [None]:
fig = plt.figure(figsize=(10,10))
i = 1
for p in oof_hist:
    fig.add_subplot(2,3,i)
    plt.plot(p.history['auc'])
    plt.plot(p.history['val_auc'])
    plt.title('fold '+str(i) + ' AUC')
    i +=1

plt.legend(labels= ['auc' ,'val_auc'])
fig.tight_layout(pad=3)
plt.show()

In [None]:
best_model2 = trainable_model(IMG_DIMS , CHANNELS)

In [None]:
best_model2.load_weights('inf_model_fold_0.hdf5')

In [None]:
preds2 = best_model2.predict(test)
sub_df['target'] = preds2
sub_df.head()

In [None]:
sub_df.to_csv('submission2.csv')