# PetFinder.my Pawpularity Score / Tuner

## Imports

In [1]:
# Imports
import os
import numpy as np
from IPython.display import display

# Import landmark recognition lib
import petfinder_pawpularity_lib as mllib
tf_strategy = mllib.tf_strategy()

'TensorFlow Version: 2.6.0'

'TensorFlow Strategy: _DefaultDistributionStrategy'

In [2]:
# Enable retina display
%config InlineBackend.figure_format = "retina"

# Load Tensorboard
%load_ext tensorboard

## Settings

### Settings definitions

In [3]:
# Settings Map
if "settingsMap" not in globals(): settingsMap = {}

In [4]:
# tuner-local-cut
settingsMap["tuner-local-cut"] = {
    "debug": False,
    "model_load_dir": os.path.join("models"),
    "model_save_dir": os.path.join("models"),
    "tuner_save_dir": os.path.join("tuner"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_copy": os.path.join("dataset-copy"),
    "dataset_batch_size": 32,
    "dataset_image_size": (150, 150),
    "dataset_cut_ratio": 0.2,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.7, 0.2, 0.1],
    "dataset_shuffle": False,
    "dataset_shuffle_seed": 42,
    "dataset_prefetch": mllib.tf.data.AUTOTUNE,
    "tuner_project_name": "petfinder-pawpularity",
    "tuner_type": "random",
    "tuner_max_epochs": 1,
    "tuner_max_trials": 2,
    "tuner_executions_per_trial": 1,
    "tuner_seed": 42,
    "tuner_hyperparameter_model_base": ["xception", "efficientnetb3", "efficientnetb5", "efficientnetb7"],
    "tuner_hyperparameter_dropout_rate": [0.0, 0.3, 0.5],
    "tuner_hyperparameter_learning_rate": [1e-4, 1e-3, 1e-2],
    "tuner_hyperparameter_input_shape": ["75x75"],
    "tuner_hyperparameter_dense_layers": ["0", "256x128"],
    "cleanup_data_flag": False,
}

In [5]:
# tuner-local-cut
settingsMap["tuner-local-full"] = {
    "debug": False,
    "model_load_dir": os.path.join("models"),
    "model_save_dir": os.path.join("models"),
    "tuner_save_dir": os.path.join("tuner"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_copy": os.path.join("dataset-copy"),
    "dataset_batch_size": 64,
    "dataset_image_size": (150, 150),
    "dataset_cut_ratio": 1.0,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.7, 0.2, 0.1],
    "dataset_shuffle": False,
    "dataset_shuffle_seed": 42,
    "dataset_prefetch": mllib.tf.data.AUTOTUNE,
    "tuner_project_name": "petfinder-pawpularity",
    "tuner_type": "random",
    "tuner_max_epochs": 3,
    "tuner_max_trials": 2,
    "tuner_executions_per_trial": 1,
    "tuner_seed": 42,
    "tuner_hyperparameter_model_base": ["xception", "efficientnetb3", "efficientnetb5", "efficientnetb7"],
    "tuner_hyperparameter_dropout_rate": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5],
    "tuner_hyperparameter_learning_rate": [1e-4, 5e-4, 1e-3, 5e-3, 1e-2],
    "tuner_hyperparameter_input_shape": ["75x75", "150x150", "250x250"],
    "tuner_hyperparameter_dense_layers": ["100"],
    "cleanup_data_flag": False,
}

In [6]:
# tuner-remote-cut
settingsMap["tuner-remote-cut"] = {
    "debug": False,
    "model_load_dir": os.path.join("..", "input", "petfinder-pawpularity-train", "models"),
    "model_save_dir": os.path.join("models"),
    "tuner_save_dir": os.path.join("tuner"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("dataset", "petfinder-pawpularity-score"),
    "dataset_dir_copy": os.path.join("dataset-copy"),
    "dataset_batch_size": 64,
    "dataset_image_size": (1000, 1000),
    "dataset_cut_ratio": 0.2,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.7, 0.2, 0.1],
    "dataset_shuffle": False,
    "dataset_shuffle_seed": 42,
    "dataset_prefetch": mllib.tf.data.AUTOTUNE,
    "tuner_project_name": "petfinder-pawpularity",
    "tuner_type": "bayesian",
    "tuner_max_epochs": 3,
    "tuner_max_trials": 50,
    "tuner_executions_per_trial": 1,
    "tuner_seed": 42,
    "tuner_hyperparameter_model_base": ["xception", "efficientnetb7"],
    "tuner_hyperparameter_dropout_rate": [0.1, 0.2, 0.3, 0.4, 0.5],
    "tuner_hyperparameter_learning_rate": [1e-4, 5e-4, 1e-3, 5e-3, 1e-2],
    "tuner_hyperparameter_input_shape": ["75x75", "150x150", "250x250", "500x500"],
    "tuner_hyperparameter_dense_layers": ["100"],
    "cleanup_data_flag": True,
}

In [7]:
# tuner-remote-full
settingsMap["tuner-remote-full"] = {
    "debug": False,
    "model_load_dir": os.path.join("..", "input", "petfinder-pawpularity-train", "models"),
    "model_save_dir": os.path.join("models"),
    "tuner_save_dir": os.path.join("tuner"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("dataset", "petfinder-pawpularity-score"),
    "dataset_dir_copy": os.path.join("dataset-copy"),
    "dataset_batch_size": 64,
    "dataset_image_size": (1000, 1000),
    "dataset_cut_ratio": 1.0,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.90, 0.05, 0.05],
    "dataset_shuffle": False,
    "dataset_shuffle_seed": 42,
    "dataset_prefetch": mllib.tf.data.AUTOTUNE,
    "tuner_project_name": "petfinder-pawpularity",
   "tuner_type": "bayesian",
    "tuner_max_epochs": 3,
    "tuner_max_trials": 50,
    "tuner_executions_per_trial": 1,
    "tuner_seed": 42,
    "tuner_hyperparameter_model_base": ["xception", "efficientnetb7"],
    "tuner_hyperparameter_dropout_rate": [0.1, 0.2, 0.3, 0.4, 0.5],
    "tuner_hyperparameter_learning_rate": [1e-4, 5e-4, 1e-3, 5e-3, 1e-2],
    "tuner_hyperparameter_input_shape": ["75x75", "150x150", "250x250", "500x500"],
    "tuner_hyperparameter_dense_layers": ["100"],
    "cleanup_data_flag": True,
}

### Selected settings

In [8]:
# Mode
mode = "tuner-local-cut"

In [9]:
# Selected settings
settings = settingsMap[mode]
display(settings)

# Debug
debug = settings["debug"]

{'debug': False,
 'model_load_dir': 'models',
 'model_save_dir': 'models',
 'tuner_save_dir': 'tuner',
 'dataset_dir_src': '../input/petfinder-pawpularity-score',
 'dataset_dir_cut': '../input/petfinder-pawpularity-score',
 'dataset_dir_copy': 'dataset-copy',
 'dataset_batch_size': 32,
 'dataset_image_size': (150, 150),
 'dataset_cut_ratio': 0.2,
 'dataset_shrink_ratio': 1.0,
 'dataset_split_ratios': [0.7, 0.2, 0.1],
 'dataset_shuffle': False,
 'dataset_shuffle_seed': None,
 'dataset_prefetch': -1,
 'tuner_project_name': 'petfinder-pawpularity',
 'tuner_type': 'random',
 'tuner_max_epochs': 1,
 'tuner_max_trials': 2,
 'tuner_executions_per_trial': 1,
 'tuner_seed': None,
 'tuner_hyperparameter_model_base': ['xception',
  'efficientnetb3',
  'efficientnetb5',
  'efficientnetb7'],
 'tuner_hyperparameter_dropout_rate': [0.0, 0.3, 0.5],
 'tuner_hyperparameter_learning_rate': [0.0001, 0.001, 0.01],
 'tuner_hyperparameter_input_shape': ['75x75'],
 'tuner_hyperparameter_dense_layers': ['0', '

## Prepare data

In [10]:
%%time
# Cut training data
dataset_dir = mllib.cut_training_data(
    cut_ratio=settings["dataset_cut_ratio"], 
    dataset_dir_src=settings["dataset_dir_src"], 
    dataset_dir_cut=settings["dataset_dir_cut"]
)
display(dataset_dir)

# Copy train.csv to output (It may be different with submission dataset)
if settings["dataset_dir_copy"] is not None:
    mllib.copy_file(os.path.join(dataset_dir, "train.csv"), os.path.join(settings["dataset_dir_copy"], "train%s.csv" % mllib.cut_suffix(settings["dataset_cut_ratio"])))

# Train data
training_data = mllib.load_training_data(dataset_dir)
if debug: 
    display(training_data)
    training_data.hist(bins=500, figsize=(18,3))

# Make training data
map_image_score_fn = lambda image, features, score, file_id: (image, score)  
train_dataset, validate_dataset, test_dataset = mllib.make_training_validate_test_data(
    dataset=mllib.load_training_dataset(
        dataset_dir=dataset_dir,
        mapping_data=training_data,
        batch_size=settings["dataset_batch_size"],
        shuffle=settings["dataset_shuffle"],
        seed=settings["dataset_shuffle_seed"],
        image_size=settings["dataset_image_size"],
    ),
    split_ratios=settings["dataset_split_ratios"],
    shrink_ratio=settings["dataset_shrink_ratio"],
    prefetch=settings["dataset_prefetch"],
)
 
# Training data infos
print("Train / Validate / Test datasets items: %s / %s / %s" % (
    settings["dataset_batch_size"] * train_dataset().cardinality().numpy(), 
    settings["dataset_batch_size"] * validate_dataset().cardinality().numpy(), 
    settings["dataset_batch_size"] * test_dataset().cardinality().numpy()
))
if debug:
    print("")
    print("Train dataset:")
    mllib.plot_images_scores_from_dataset(train_dataset().take(1).map(map_image_score_fn))
    print("Validate dataset:")
    mllib.plot_images_scores_from_dataset(validate_dataset().take(1).map(map_image_score_fn))
    print("Test dataset:")
    mllib.plot_images_scores_from_dataset(test_dataset().take(1).map(map_image_score_fn))

'../input/petfinder-pawpularity-score-cut-0.200'

'Copy ../input/petfinder-pawpularity-score-cut-0.200/train.csv to dataset-copy/train-cut-0.200.csv'

'Load training data from ../input/petfinder-pawpularity-score-cut-0.200/train.csv'

Train / Validate / Test datasets items: 1376 / 384 / 192
CPU times: user 1.02 s, sys: 207 ms, total: 1.23 s
Wall time: 1.02 s


## Tune model

In [11]:
# Build hypermodel
def build_hypermodel():
    model_parameters = {
        "model_prefix": "model" + mllib.cut_suffix(settings["dataset_cut_ratio"]),
        "input_shape": [None, None, 3],
        "input_shape_features": len(mllib.feature_fields),
        "output_size": 1,
        "dense_layers_activation": "elu",
        "preload_weights": "imagenet",
        "fine_tuning": False,
    }
    model_parameters["model_name"] = model_parameters["model_prefix"]
    def _build_model(hp):
        model_parameters["model_base"] = hp.Choice("model_base", values=settings["tuner_hyperparameter_model_base"])
        model_parameters["dropout_rate"] = hp.Choice("dropout_rate", values=settings["tuner_hyperparameter_dropout_rate"])
        model_parameters["learning_rate"] = hp.Choice("learning_rate", values=settings["tuner_hyperparameter_learning_rate"])
        model_parameters["image_resize"] = hp.Choice("input_shape", values=settings["tuner_hyperparameter_input_shape"])
        model_parameters["dense_layers"] = hp.Choice("dense_layers", values=settings["tuner_hyperparameter_dense_layers"])
        return mllib.setup_model(model_parameters)
    return _build_model

In [12]:
# Tune model
with tf_strategy.scope():
    tuner = mllib.tune_model(
        hypermodel=build_hypermodel(), 
        settings=settings, 
        train_dataset=train_dataset, 
        validate_dataset=validate_dataset,
    )

Trial 2 Complete [00h 01m 21s]
val_rmse: 21.808916091918945

Best val_rmse So Far: 21.808916091918945
Total elapsed time: 00h 01m 47s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in tuner/petfinder-pawpularity
Showing 10 best trials
Objective(name='val_rmse', direction='min')
Trial summary
Hyperparameters:
model_base: efficientnetb7
dropout_rate: 0.0
learning_rate: 0.01
input_shape: 75x75
dense_layers: 0
Score: 21.808916091918945
Trial summary
Hyperparameters:
model_base: xception
dropout_rate: 0.3
learning_rate: 0.01
input_shape: 75x75
dense_layers: 256x128
Score: 25.251707077026367


## Cleanup

In [13]:
%%time
if settings["cleanup_data_flag"]: 
    mllib.delete_training_data(cut_ratio=settings["dataset_cut_ratio"], dataset_dir_cut=settings["dataset_dir_cut"])

CPU times: user 6 µs, sys: 1 µs, total: 7 µs
Wall time: 12.6 µs
