---
## [PetFinder.my - Pawpularity Contest][1]
---
**Comments**: Thanks to previous great Notebooks.

[Vision Transformer (ViT) Fine-tuning][2]

[1]: https://www.kaggle.com/c/petfinder-pawpularity-score
[2]: https://www.kaggle.com/raufmomin/vision-transformer-vit-fine-tuning

# 0. Settings

In [None]:
# Import dependencies 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
%matplotlib inline

import os
import pathlib
import gc
import sys
import math 
import time 
import tqdm 
from tqdm import tqdm 
import random

import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_hub as hub
from tensorflow.keras.layers.experimental import preprocessing

from sklearn.model_selection import KFold 
from sklearn.model_selection import StratifiedKFold 

In [None]:
# global config
# When you would like to use another pre-trained model in TensorFlow Hub, you can change 'model_url'. 
config = {
    'data_path': '../input/petfinder-pawpularity-score',
    'model_1_path': '../input/effnet-v2-s-feature-vector',
    'model_2_path': '../input/vit-l32',
    'model_3_path': '../input/keras-xception',
    'input_path': '../input', 
    'output_path': './',
    'model_1_url': "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_s/feature_vector/2",
    'nfolds': 10,
    'batch_size': 16,
    'learning_rate': 1e-4,
    'num_epochs': 10,
    'image_size': (384, 384),
    'input_shape': (384, 384, 3),
    'blend_weight': 1/3,
}

AUTOTUNE = tf.data.experimental.AUTOTUNE

# For reproducible results    
def seed_all(s):
    random.seed(s)
    np.random.seed(s)
    tf.random.set_seed(s)
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1'
    os.environ['PYTHONHASHSEED'] = str(s) 
global_seed = 42
seed_all(global_seed)

# 1. DataFrame Preprocessing

In [None]:
data_folder = config['data_path']
train_folder = os.path.join(data_folder, 'train')
test_folder = os.path.join(data_folder, 'test')
sample_submission_path = data_folder + '/sample_submission.csv'

train_df = pd.read_csv(os.path.join(data_folder, 'train.csv'))
print(train_df.shape)
test_df = pd.read_csv(os.path.join(data_folder, 'test.csv'))
print(test_df.shape)
#sample_df = pd.read_csv(sample_submission_path)
#print(sample_df.shape)

#train_path = pathlib.Path(train_folder); print(train_path)
#train_photo_list = list(train_path.iterdir()); print(len(train_photo_list))
#test_path = pathlib.Path(test_folder); print(test_path)
#test_photo_list = list(test_path.iterdir()); print(len(test_photo_list))

train_df

In [None]:
# Pawpularity Scaling
scaler = train_df['Pawpularity'].max()
train_df['Pawpularity_scaled'] = train_df['Pawpularity'] / scaler

# add 'Path' column
path_list = []
for id in train_df['Id']:
    path = os.path.join(train_folder, id) + '.jpg'
    path_list.append(path)
train_df['Path'] = path_list

# Data Shuffling
train_df_shuffled=train_df.iloc[np.random.permutation(train_df.index)].reset_index(drop=True)

# split validation data
kf = KFold(n_splits=config['nfolds'])
for nfold, (train_index, val_index) in enumerate(kf.split(train_df_shuffled)):
    train_df_shuffled.loc[val_index, 'fold'] = nfold
print(train_df_shuffled.groupby(['fold', train_df_shuffled.fold]).size())
print()
    
#skf = StratifiedKFold(n_splits=config['nfolds'], shuffle=True, random_state=global_seed)
#for nfold, (train_index, val_index) in enumerate(skf.split(X=train_df.index,
#                                                           y=train_df.target)):
#    train_df.loc[val_index, 'fold'] = nfold
#print(train_df.groupby(['fold', train_df.target]).size())

train_df_shuffled

In [None]:
p_trains = []
p_valids = []
for p in range(3):
    p_fold = p
    p_train = train_df_shuffled.query(f'fold != {p_fold}').reset_index(drop=True)
    p_valid = train_df_shuffled.query(f'fold == {p_fold}').reset_index(drop=True)
    p_trains.append(p_train)
    p_valids.append(p_valid)
    print('-'*30)
    print(f'train-{p}\n', p_train.Pawpularity.describe())
    print()
    print(f'valid-{p}\n', p_valid.Pawpularity.describe())
    print()

# 2. DataGenerator

In [None]:
@tf.function
def preprocessing_img(img):
    img = tf.expand_dims(img, axis=0)
    img = tf.image.resize(img, config['image_size'])
    #img /= 255.0
    return img

def load_and_preprocessing_img(path_list):
    img_list = []
    for path in path_list:
        img_raw = tf.io.read_file(path)
        img_tensor = tf.image.decode_image(img_raw)
        img_list.append(preprocessing_img(img_tensor))        
    img_batch = tf.concat(img_list, axis=0)
    return img_batch

class ImageSequence(keras.utils.Sequence):
    def __init__(self, df, batch_size=config['batch_size'], mode='train'):
        self.l = None
        self.x = df.Path
        self.y = df.Pawpularity_scaled
        self.batch_size = batch_size 
        self.num_samples = len(df)
        self.mode = mode

    def __len__(self):
        self.l = self.num_samples // self.batch_size
        return self.l

    def __getitem__(self, idx):
        batch_x = self.x[idx * self.batch_size:(idx+1) * self.batch_size]
        batch_y = self.y[idx * self.batch_size:(idx+1) * self.batch_size]
        
        batch_x_img = load_and_preprocessing_img(batch_x)
        return batch_x_img, np.array(batch_y)


In [None]:
train_gens = []
valid_gens = []

for p in range(3):
    train_gen = ImageSequence(p_trains[p], mode='train')
    valid_gen = ImageSequence(p_valids[p], mode='valid')
    
    train_gens.append(train_gen)
    valid_gens.append(valid_gen)
    
    print('-'*30)
    print(f'train_gen_{p+1} length', len(train_gen))
    print(f'valid_gen_{p+1} length', len(valid_gen))
    
    sample = next(iter(train_gen))
    print(sample[0].shape)
    print(sample[1].shape)

# 3. Model Training

### 3.1 Model_1 (EfficientNet V2 trained on imagenet-21k)

In [None]:
"""
# Downloading models from TensorFlow Hub (Internet should be avairable).
base_model_1 = tf.keras.Sequential([
    hub.KerasLayer(config['model_1_url'], trainable=False),
    tf.keras.layers.Dense(512, activation='selu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='selu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation=None)
])
"""

base_model_1 = tf.keras.Sequential([
    tf.keras.models.load_model(config['model_1_path']),
    tf.keras.layers.Dense(512, activation='selu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='selu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation=None)
])

data_augmentation = tf.keras.models.Sequential([
    preprocessing.RandomFlip('horizontal'),
    preprocessing.RandomRotation(0.1),
    preprocessing.RandomZoom(0.1),
])

inputs_1 = keras.Input(shape=config['input_shape'])
x_1 = data_augmentation(inputs_1)
x_1 = tf.keras.layers.Resizing(384, 384)(x_1)
x_1 = tf.keras.layers.Rescaling(1. / 255)(x_1)
outputs_1 = base_model_1(x_1)
model_1 = keras.Model(inputs_1, outputs_1)

model_1.summary()

In [None]:
model_1.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='mean_squared_error',
              metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse")])

train_gen = train_gens[0]
valid_gen = valid_gens[0]

fit_history_1 = model_1.fit_generator(train_gen, epochs=5,
                                      steps_per_epoch=len(train_gen),
                                      verbose=1,
                                      validation_data=valid_gen,
                                      validation_steps=len(valid_gen))

In [None]:
# Finetuning
for l in model_1.layers:
    l.trainable = True
    
model_1.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
                loss='mean_squared_error',
                metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse")])

model_1.summary()

In [None]:
fit_history_1 = model_1.fit_generator(train_gen, epochs=2,
                                      steps_per_epoch=len(train_gen),
                                      verbose=1,
                                      validation_data=valid_gen,
                                      validation_steps=len(valid_gen))

### 3.2 Model_2 (ViT)

In [None]:
"""
# downloading models from vit-keras (Internet should be avairable).
!pip install vit-keras -q
!pip install tensorflow-addons -q

from vit_keras import vit

vit_model = vit.vit_l32(
    image_size=384,
    pretrained=True,
    include_top=False,
    pretrained_top=False,
)
"""

vit_model = tf.keras.models.load_model(config['model_2_path'])

base_model_2 = tf.keras.Sequential([
    vit_model,
    tf.keras.layers.Dense(256, activation='selu'),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(1, activation=None)
])

data_augmentation = tf.keras.models.Sequential([
    preprocessing.RandomFlip('horizontal'),
    preprocessing.RandomRotation(0.1),
    preprocessing.RandomZoom(0.1),
])

inputs_2 = keras.Input(shape=config['input_shape'])
x_2 = data_augmentation(inputs_2)
x_2 = tf.keras.layers.Resizing(384, 384)(x_2)
x_2 = tf.keras.layers.Rescaling(1. / 255)(x_2)
outputs_2 = base_model_2(x_2)
model_2 = keras.Model(inputs_2, outputs_2)

model_2.summary()

In [None]:
for layer in vit_model.layers:
    layer.trainable = False

from tensorflow.keras.optimizers.schedules import PolynomialDecay
from tensorflow.keras.optimizers import Adam

num_epochs = 3
num_train_steps = len(train_gen) * num_epochs

lr_scheduler = PolynomialDecay(
    initial_learning_rate=1e-3, end_learning_rate=1e-4, decay_steps=num_train_steps
)

model_2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_scheduler),
              loss='mean_squared_error',
              metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse")])

model_2.summary()

In [None]:
train_gen = train_gens[1]
valid_gen = valid_gens[1]

fit_history_2 = model_2.fit_generator(train_gen, epochs=num_epochs,
                                      steps_per_epoch=len(train_gen),
                                      verbose=1,
                                      validation_data=valid_gen,
                                      validation_steps=len(valid_gen))

In [None]:
# Finetuning
for layer in vit_model.layers:
    layer.trainable = True

num_epochs = 2
num_train_steps = len(train_gen) * num_epochs

lr_scheduler = PolynomialDecay(
    initial_learning_rate=5e-5, end_learning_rate=0.0, decay_steps=num_train_steps
)

model_2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_scheduler),
              loss='mean_squared_error',
              metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse")])


model_2.summary()

In [None]:
fit_history_2 = model_2.fit_generator(train_gen, epochs=num_epochs,
                                      steps_per_epoch=len(train_gen),
                                      verbose=1,
                                      validation_data=valid_gen,
                                      validation_steps=len(valid_gen))

### 3.3 Model_3 (Xception)

In [None]:
"""
# downloading models (Internet should be avairable).
base_model_3 = tf.keras.Sequential([
    tf.keras.applications.xception.Xception(
        include_top=False, weights='imagenet', 
        input_shape=(299, 299, 3)),
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(512, activation='selu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='selu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation=None)
])
"""
xception_model =  tf.keras.models.load_model(config['model_3_path'])

base_model_3 = tf.keras.Sequential([
    xception_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(512, activation='selu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(128, activation='selu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(1, activation=None)
])

data_augmentation = tf.keras.models.Sequential([
    preprocessing.RandomFlip('horizontal'),
    preprocessing.RandomRotation(0.1),
    preprocessing.RandomZoom(0.1),
])

inputs_3 = keras.Input(shape=config['input_shape'])
x_3 = data_augmentation(inputs_3)
x_3 = tf.keras.layers.Resizing(299, 299)(x_3)
x_3 = tf.keras.layers.Rescaling(1. / 255)(x_3)
outputs_3 = base_model_3(x_3)
model_3 = keras.Model(inputs_3, outputs_3)

for l in xception_model.layers:
    l.trainable = False

model_3.summary()

In [None]:
model_3.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss='mean_squared_error',
              metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse")])

train_gen = train_gens[2]
valid_gen = valid_gens[2]

fit_history_3 = model_3.fit_generator(train_gen, epochs=3,
                                      steps_per_epoch=len(train_gen),
                                      verbose=1,
                                      validation_data=valid_gen,
                                      validation_steps=len(valid_gen))

In [None]:
# Finetuning
for l in model_3.layers:
    l.trainable = True
    
from tensorflow.keras.optimizers.schedules import PolynomialDecay
from tensorflow.keras.optimizers import Adam

num_epochs = 2
num_train_steps = len(train_gen) * num_epochs

lr_scheduler = PolynomialDecay(
    initial_learning_rate=1e-4, end_learning_rate=1e-5, decay_steps=num_train_steps
)

model_3.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr_scheduler),
              loss='mean_squared_error',
              metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse")])

model_3.summary()

In [None]:
fit_history_3 = model_3.fit_generator(train_gen, epochs=num_epochs,
                                      steps_per_epoch=len(train_gen),
                                      verbose=1,
                                      validation_data=valid_gen,
                                      validation_steps=len(valid_gen))

# 4. Prediction

### 4.1 Test DataGenerator

In [None]:
def preprocess_dataframe(df, mode='train', shuffle=True, nfolds=None):
    if mode == 'train':
        img_folder = train_folder
        
        # Pawpularity Scaling
        df['Pawpularity'] = df['Pawpularity'] / df['Pawpularity'].max()
    else:
        img_folder = test_folder
    
    # add 'Path' column
    path_list = []
    for img_id in df['Id']:
        path = os.path.join(img_folder, img_id) + '.jpg'
        path_list.append(path)
    df['Path'] = path_list
    
    # Data Shuffling
    if shuffle == True:
        df = df.iloc[np.random.permutation(df.index)].reset_index(drop=True)
        
    # split validation data
    if nfolds is not None:
        kf = KFold(n_splits=config['nfolds'])
        for nfold, (train_index, val_index) in enumerate(kf.split(df)):
            df.loc[val_index, 'fold'] = nfold
        
    return df

test_df = preprocess_dataframe(test_df, mode='test', shuffle=False)
test_df

In [None]:
class TestImageSequence(keras.utils.Sequence):
    def __init__(self, df, batch_size=config['batch_size']):
        self.l = None
        self.x = df.Path
        self.y = None 
        self.num_samples = len(df)
        self.batch_size = batch_size
        
    def __len__(self):
        if self.num_samples % self.batch_size == 0:
            self.l = self.num_samples // self.batch_size
        else:
            self.l = self.num_samples // self.batch_size + 1
        return self.l

    def __getitem__(self, idx):
        if (idx+1) * self.batch_size <= self.l:
            batch_x = self.x[idx * self.batch_size:(idx+1) * self.batch_size]
        else:
            batch_x = self.x[idx * self.batch_size:]
        batch_x_img = load_and_preprocessing_img(batch_x)
        return batch_x_img
    
# When the batch_size is not '1', submission was failed because of some errors.
test_gen = TestImageSequence(test_df, batch_size=1)
print(len(test_gen))

sample =  next(iter(test_gen))
print(sample.shape)

### 4.2 Model Ensemble

In [None]:
pred_score_1 = model_1.predict_generator(test_gen)
pred_score_1 = pred_score_1 * scaler # scaler == train_df['Pawpularity'].max()

pred_score_2 = model_2.predict_generator(test_gen)
pred_score_2 = pred_score_2 * scaler # scaler == train_df['Pawpularity'].max()

pred_score_3 = model_3.predict_generator(test_gen)
pred_score_3 = pred_score_3 * scaler # scaler == train_df['Pawpularity'].max()

print(pred_score_1)
print(pred_score_2)
print(pred_score_3)

In [None]:
pred_score = np.mean([pred_score_1, pred_score_2, pred_score_3], axis=0)

print(pred_score.shape)

In [None]:
test_df['Pawpularity'] = pred_score
submission_df = test_df[['Id', 'Pawpularity']]

submission_df.to_csv("submission.csv", index=False)
submission_df