In [None]:
# Install libraries
!pip install '../input/offline-packages/Keras_Applications-1.0.8-py3-none-any.whl'
!pip install '../input/offline-packages/efficientnet-1.1.1-py3-none-any.whl'

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
import efficientnet.tfkeras as efn

import os
filenames = os.listdir('../input/petfinder-pawpularity-score')
print(filenames)

## Config

In [None]:
Q = 30
feature_folds = 5
batch_size = 16
epochs  = 10
seed  = 4261
verbose = 1
LR  = 0.0005
CHANNELS = 3
IMG_SIZE = 384
# SetAutoTune
AUTOTUNE = tf.data.experimental.AUTOTUNE  

root_dir = '../input/petfinder-pawpularity-score/'
train_meta = pd.read_csv(root_dir + 'train.csv')
test_meta = pd.read_csv(root_dir + 'test.csv')
train_dir = root_dir + 'train/'
test_dir = root_dir + 'test/'

In [None]:
train_meta.head()

In [None]:
train_meta['Id'] = train_meta['Id'].apply(lambda x: train_dir + x + '.jpg')

# Set a specific label to be able to perform stratification
train_meta['stratify_label'] = pd.qcut(train_meta['Pawpularity'], q = Q, labels = range(Q))

# Label value to be used for feature model 'classification' training.
train_meta['target_value'] = train_meta['Pawpularity'] / 100.

# Summary
print('train_meta:{}'.format(train_meta.shape))
train_meta.head()

In [None]:
test_meta['Id'] = test_meta['Id'].apply(lambda x: test_dir + x + '.jpg')
test_meta['Pawpularity'] = 0

print('test_meta:{}'.format(test_meta.shape))
test_meta.head()

## Define strategy

In [None]:
# Detect hardware, return appropriate distribution strategy
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is set. On Kaggle this is always the case.
    # On google colab (tpu_address = 'grpc://' + os.environ['COLAB_TPU_ADDR'], pass tpu_address as param in below fn)
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() 
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() # default distribution strategy in Tensorflow. Works on CPU and single GPU.
    #If no TPU, uncomment below to check for GPU
    #strategy = tf.distribute.MirroredStrategy() # for GPU or multi-GPU machines

print("REPLICAS: ", strategy.num_replicas_in_sync)

## Create Tf Dataset

In [None]:
def build_augmenter(is_labelled):
    def augment(img):
        # Only use basic augmentations...too much augmentation hurts performance
        img = tf.image.random_flip_left_right(img)
        img = tf.image.random_flip_up_down(img)
        img = tf.image.random_saturation(img, 0.95, 1.05)
        img = tf.image.random_brightness(img, 0.05)
        img = tf.image.random_contrast(img, 0.95, 1.05)
        img = tf.image.random_hue(img, 0.05)
        
        return img
    
    def augment_with_labels(img, label):
        return augment(img), label
    
    return augment_with_labels if is_labelled else augment

def build_decoder(is_labelled):
    def decode(path):
        # Read Image
        file_bytes = tf.io.read_file(path)
        img = tf.image.decode_jpeg(file_bytes, channels = CHANNELS)
        
        # Normalize and Resize
        img = tf.cast(img, tf.float32) / 255.0
        img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
        
        return img
    
    def decode_with_labels(path, label):
        return decode(path), label
    
    return decode_with_labels if is_labelled else decode

def create_dataset(df, batch_size = 32, is_labelled = False, augment = False, repeat = False, shuffle = False):
    decode_fn = build_decoder(is_labelled)
    augmenter_fn = build_augmenter(is_labelled)
    
    # Create Dataset
    if is_labelled:
        dataset = tf.data.Dataset.from_tensor_slices((df['Id'].values, df['target_value'].values))
    else:
        dataset = tf.data.Dataset.from_tensor_slices((df['Id'].values))
    dataset = dataset.map(decode_fn, num_parallel_calls = AUTOTUNE)
    dataset = dataset.map(augmenter_fn, num_parallel_calls = AUTOTUNE) if augment else dataset
    dataset = dataset.repeat() if repeat else dataset
    dataset = dataset.shuffle(1024, reshuffle_each_iteration = True) if shuffle else dataset
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    
    return dataset

## Define Model

In [None]:
def unfreeze_model(model):
    # Unfreeze layers while leaving BatchNorm layers frozen
    for layer in model.layers:
        if not isinstance(layer, tf.keras.layers.BatchNormalization):
            layer.trainable = True
        else:
            layer.trainable = False

def create_model(): 
    # Create and Compile Model and show Summary
#     effnet_model = tf.keras.applications.EfficientNetB1(weights='imagenet', 
#                                                             include_top=False, 
#                                                             input_shape = (IMG_SIZE, IMG_SIZE, CHANNELS), pooling='avg')
    
    effnet_model = efn.EfficientNetB2(include_top = False, 
                                      classes = None, 
                                      input_shape = (IMG_SIZE, IMG_SIZE, CHANNELS), 
                                      weights = '../input/weights/efficientnet-b2_noisy-student_notop.h5',
                                      pooling = 'avg')

    # Set all layers to Trainable except BN layers
    unfreeze_model(effnet_model)
    
    X = tf.keras.layers.Dropout(0.25)(effnet_model.output)
    output = tf.keras.layers.Dense(1, activation = 'sigmoid')(X)
    
    # Create Final Model
    model = tf.keras.Model(inputs = effnet_model.input, outputs = output)

    # Compile
    model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = LR), 
                  loss = tf.keras.losses.BinaryCrossentropy(), 
                  metrics = [tf.keras.metrics.RootMeanSquaredError('rmse')])        
    
    return model

In [None]:
training_dataset = create_dataset(train_meta,
                                  batch_size  = batch_size, 
                                  is_labelled = True, 
                                  augment = True,
                                  repeat  = False, 
                                  shuffle = False)

## Define callbacks

In [None]:
from tensorflow.keras import losses, optimizers , metrics
from tensorflow.keras import callbacks

def get_lr_callback(batch_size=8):
    lr_start   = 0.000005
    lr_max     = 0.00000125 * batch_size 
    lr_min     = 0.000001
    lr_ramp_ep = 5
    lr_sus_ep  = 0
    lr_decay   = 0.8
    def lrfn(epoch):
        if epoch < lr_ramp_ep:
            lr = (lr_max - lr_start) / lr_ramp_ep * epoch + lr_start
        elif epoch < lr_ramp_ep + lr_sus_ep:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) * lr_decay**(epoch - lr_ramp_ep - lr_sus_ep) + lr_min
        return lr
    return callbacks.LearningRateScheduler(lrfn, verbose=True)

def model_callback(fold):
    ckpt = tf.keras.callbacks.ModelCheckpoint(f'feature_model_{fold}.h5',
                                              verbose = 1, 
                                              monitor = 'val_rmse',
                                              mode = 'min', 
                                              save_weights_only = True,
                                              save_best_only = True)
    
    return [ckpt, get_lr_callback(batch_size)]

## Check images in dataset

In [None]:
sample_images, _ = next(iter(training_dataset))

import matplotlib.pyplot as plt 

plt.figure(figsize=(16, 10))
for i, image in enumerate(sample_images[:6]):
    print(image.shape)
    ax = plt.subplot(3, 4, 2 * i + 1)
    plt.title("Input Image")
    plt.imshow(image.numpy().squeeze())
    plt.axis("off")

#     ax = plt.subplot(3, 4, 2 * i + 2)
#     resized_image = learnable_resizer(image[None, ...])
#     plt.title("Resized Image")
#     plt.imshow(resized_image.numpy().squeeze())
#     plt.axis("off")

## Create and train models

In [None]:
import gc
from sklearn.model_selection import StratifiedKFold

# OOF RMSE Placeholder
all_val_loss = []
kfold = StratifiedKFold(n_splits = feature_folds, 
                        shuffle = True, random_state = seed)
for fold, (train_index, val_index) in enumerate(kfold.split(train_meta.index,
                                                            train_meta['stratify_label'])):
#     if fold == 1:
    print(f'\nFold {fold}\n')
    # Pre model.fit cleanup
    tf.keras.backend.clear_session()
    gc.collect()

    # Create Model
    model = create_model()
#     for i in range(len(model.weights)):
#         model.weights[i]._handle_name = model.weights[i].name + str(i)

    # Create TF Datasets
    trn = train_meta.iloc[train_index]
    val = train_meta.iloc[val_index]
    training_dataset = create_dataset(trn, 
                                      batch_size  = batch_size, 
                                      is_labelled = True, 
                                      augment     = True, 
                                      repeat      = True, 
                                      shuffle     = True)
    validation_dataset = create_dataset(val, 
                                        batch_size  = batch_size, 
                                        is_labelled = True,
                                        augment     = False, 
                                        repeat      = True,
                                        shuffle     = False)
    # Fit Model
    history = model.fit(training_dataset,
                        epochs = epochs,
                        steps_per_epoch  = trn.shape[0] // batch_size,
                        validation_steps = val.shape[0] // batch_size,
                        callbacks = model_callback(fold),
                        validation_data = validation_dataset,
                        verbose = 1)   

#         # Validation Information
#         best_val_loss = min(history.history['val_rmse'])
#         all_val_loss.append(best_val_loss)
#         print(f'\nValidation RMSE: {best_val_loss}\n')

## Create submission file
We will calculate average of predictions from all 5 models to get our final prediction.

In [None]:
submission_df = pd.read_csv('{}sample_submission.csv'.format(root_dir))
pred = 0

for fold_index in range(feature_folds):
    model = create_model()
    model.load_weights('feature_model_{}.h5'.format(fold_index))
    
    cb_test_set = create_dataset(test_meta, 
                             batch_size  = batch_size,
                             is_labelled = False,
                             repeat      = False, 
                             shuffle     = False)
    pred = pred + model.predict(cb_test_set)*100
    
submission_df['Pawpularity'] = pred/5
submission_df.to_csv('submission.csv', index = False)
submission_df.head(10)